From 79c94a1275bfe2f1e4deae927f288425bb9eaa33 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 23 Apr 2024 22:05:46 +0000 Subject: [PATCH 01/90] fixed fp8 conflict with aqlm --- vllm/model_executor/layers/quantization/fp8.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 8df82e0e18edd..01e494c870e71 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -64,12 +64,13 @@ def create_weights( self, layer: torch.nn.Module, input_size_per_partition: int, - output_size_per_partition: int, + output_partition_sizes: List[int], input_size: int, output_size: int, params_dtype: torch.dtype, **extra_weight_attrs, ): + output_size_per_partition = sum(output_partition_sizes) weight = Parameter(torch.empty(output_size_per_partition, input_size_per_partition, dtype=params_dtype), From f8b57e4320303b6001949fd57ba7ce12892466df Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 23 Apr 2024 22:10:05 +0000 Subject: [PATCH 02/90] added quantization tests to buildkite --- .buildkite/test-pipeline.yaml | 3 +++ tests/quantization/test_fp8.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index f7c1569696249..11cda053260ec 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -96,6 +96,9 @@ steps: - label: Metrics Test command: pytest -v -s metrics +- label: Quantization Test + command: pytest -v -s quantization + - label: Benchmarks working_dir: "/vllm-workspace/.buildkite" commands: diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index fa10e60de10a7..d643ebd38bb5d 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -12,9 +12,9 @@ capability = capability[0] * 10 + capability[1] -@pytest.mark.skipif( - capability < QUANTIZATION_METHODS["fp8"].get_min_capability(), - reason="FP8 is not supported on this GPU type.") +# @pytest.mark.skipif( +# capability < QUANTIZATION_METHODS["fp8"].get_min_capability(), +# reason="FP8 is not supported on this GPU type.") def test_load_fp16_model(vllm_runner) -> None: llm = vllm_runner("facebook/opt-125m", quantization="fp8") From 7175e5b119e1cebdabe7202a5a0387a77ae80c72 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 23 Apr 2024 22:11:40 +0000 Subject: [PATCH 03/90] removed commented out piece --- tests/quantization/test_fp8.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index d643ebd38bb5d..fa10e60de10a7 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -12,9 +12,9 @@ capability = capability[0] * 10 + capability[1] -# @pytest.mark.skipif( -# capability < QUANTIZATION_METHODS["fp8"].get_min_capability(), -# reason="FP8 is not supported on this GPU type.") +@pytest.mark.skipif( + capability < QUANTIZATION_METHODS["fp8"].get_min_capability(), + reason="FP8 is not supported on this GPU type.") def test_load_fp16_model(vllm_runner) -> None: llm = vllm_runner("facebook/opt-125m", quantization="fp8") From 7a7520de4037161c14f6c7acff68c6f86e499fb8 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 23 Apr 2024 23:26:05 +0000 Subject: [PATCH 04/90] model loaded! --- tests/quantization/test_fp8.py | 7 +++ vllm/model_executor/layers/linear.py | 46 +++++++++++++++++++ .../layers/quantization/__init__.py | 2 + vllm/model_executor/models/llama.py | 1 + 4 files changed, 56 insertions(+) diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index fa10e60de10a7..c121d25daebe7 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -10,7 +10,14 @@ capability = torch.cuda.get_device_capability() capability = capability[0] * 10 + capability[1] +print(capability) +@pytest.mark.skipif( + capability < QUANTIZATION_METHODS["fp8_static"].get_min_capability(), + reason="FP8 is not supported on this GPU type.") +def test_load_static_model(vllm_runner) -> None: + llm = vllm_runner("FriendliAI/Llama-2-7b-chat-hf-fp8", quantization="fp8_static") + print(llm) @pytest.mark.skipif( capability < QUANTIZATION_METHODS["fp8"].get_min_capability(), diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index e56af9075e2fd..9da2a6e1287c8 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -285,6 +285,22 @@ def weight_loader(self, param_data = param.data output_dim = getattr(param, "output_dim", None) is_metadata = getattr(param, "is_metadata", False) + + # TODO: document. + # TODO: sync with is_metadata. + # For loading scales. + param_shard_splitter = getattr(param, "shard_splitter", None) + if output_dim is not None and param_shard_splitter is not None: + raise NotImplementedError( + "We do not currently support output_dim != None and " + "shard_splitter != None for a parameter. Please open an issue." + ) + if loaded_shard_id is None and param_shard_splitter is not None: + raise NotImplementedError( + "We do not currently support loaded_shard_id == None and " + "shard_splitter != None for a parameter. Please open an issue." + ) + if loaded_shard_id is None: # Loaded weight is already packed. if output_dim is None: @@ -342,6 +358,13 @@ def weight_loader(self, shard_size = loaded_weight.shape[0] shard_offset = loaded_shard_id * shard_size param_data = param_data.narrow(0, shard_offset, shard_size) + + # TODO: sync with is_metadata UX. + # If a param_shard_splitter is defined by the LinearMethod, use it. + elif param_shard_splitter is not None: + param_data, loaded_weight = param_shard_splitter( + param_data, loaded_weight, loaded_shard_id) + else: ignore_warning = getattr(param, "ignore_warning", False) if not ignore_warning: @@ -423,6 +446,19 @@ def weight_loader(self, param_data = param.data output_dim = getattr(param, "output_dim", None) is_metadata = getattr(param, "is_metadata", False) + + # TODO: sync with is_metadata UX + param_shard_splitter = getattr(param, "shard_splitter", None) + if output_dim is not None and param_shard_splitter is not None: + raise NotImplementedError( + "We do not currently support output_dim != None and " + "shard_splitter != None for a parameter. Please open an issue." + ) + if loaded_shard_id is None and param_shard_splitter is not None: + raise NotImplementedError( + "We do not currently support loaded_shard_id == None and " + "shard_splitter != None for a parameter. Please open an issue." + ) if loaded_shard_id is None: # Loaded weight is already packed. @@ -496,6 +532,11 @@ def weight_loader(self, shard_index = ["q", "k", "v"].index(loaded_shard_id) param_data = param_data.narrow(0, shard_index * shard_size, shard_size) + # TODO: sync with QKV + # If a param_shard_splitter is defined by the LinearMethod, use it. + elif param_shard_splitter is not None: + param_data, loaded_weight = param_shard_splitter( + param_data, loaded_weight, loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) if not ignore_warning: @@ -592,6 +633,11 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): start_idx = tp_rank * shard_size loaded_weight = loaded_weight.narrow(input_dim, start_idx, shard_size) + # TODO: canon + # This is for loading scales for fp8, which have no dims. + if len(loaded_weight.shape) == 0: + loaded_weight = loaded_weight.reshape(1) + assert param_data.shape == loaded_weight.shape param_data.copy_(loaded_weight) diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py index a525add458499..c139331307872 100644 --- a/vllm/model_executor/layers/quantization/__init__.py +++ b/vllm/model_executor/layers/quantization/__init__.py @@ -5,6 +5,7 @@ from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) from vllm.model_executor.layers.quantization.fp8 import FP8Config +from vllm.model_executor.layers.quantization.fp8_static import FP8StaticConfig from vllm.model_executor.layers.quantization.gptq import GPTQConfig from vllm.model_executor.layers.quantization.marlin import MarlinConfig from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig @@ -13,6 +14,7 @@ "aqlm": AQLMConfig, "awq": AWQConfig, "fp8": FP8Config, + "fp8_static": FP8StaticConfig, "gptq": GPTQConfig, "squeezellm": SqueezeLLMConfig, "marlin": MarlinConfig, diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 016e3b039d1e8..d542d415e6cf7 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -386,6 +386,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters()) for name, loaded_weight in weights: + print(f"----- {name}") if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name From e0b4d727c011732db0ba9dc62864f354437e0ccd Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 23 Apr 2024 23:28:21 +0000 Subject: [PATCH 05/90] renamed --- vllm/model_executor/layers/linear.py | 28 ++++++++++++++-------------- vllm/model_executor/models/llama.py | 1 - 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 9da2a6e1287c8..01af70a34c488 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -289,16 +289,16 @@ def weight_loader(self, # TODO: document. # TODO: sync with is_metadata. # For loading scales. - param_shard_splitter = getattr(param, "shard_splitter", None) - if output_dim is not None and param_shard_splitter is not None: + shard_indexer = getattr(param, "shard_indexer", None) + if output_dim is not None and shard_indexer is not None: raise NotImplementedError( "We do not currently support output_dim != None and " - "shard_splitter != None for a parameter. Please open an issue." + "shard_indexer != None for a parameter. Please open an issue." ) - if loaded_shard_id is None and param_shard_splitter is not None: + if loaded_shard_id is None and shard_indexer is not None: raise NotImplementedError( "We do not currently support loaded_shard_id == None and " - "shard_splitter != None for a parameter. Please open an issue." + "shard_indexer != None for a parameter. Please open an issue." ) if loaded_shard_id is None: @@ -361,8 +361,8 @@ def weight_loader(self, # TODO: sync with is_metadata UX. # If a param_shard_splitter is defined by the LinearMethod, use it. - elif param_shard_splitter is not None: - param_data, loaded_weight = param_shard_splitter( + elif shard_indexer is not None: + param_data, loaded_weight = shard_indexer( param_data, loaded_weight, loaded_shard_id) else: @@ -448,16 +448,16 @@ def weight_loader(self, is_metadata = getattr(param, "is_metadata", False) # TODO: sync with is_metadata UX - param_shard_splitter = getattr(param, "shard_splitter", None) - if output_dim is not None and param_shard_splitter is not None: + shard_indexer = getattr(param, "shard_indexer", None) + if output_dim is not None and shard_indexer is not None: raise NotImplementedError( "We do not currently support output_dim != None and " - "shard_splitter != None for a parameter. Please open an issue." + "shard_indexer != None for a parameter. Please open an issue." ) - if loaded_shard_id is None and param_shard_splitter is not None: + if loaded_shard_id is None and shard_indexer is not None: raise NotImplementedError( "We do not currently support loaded_shard_id == None and " - "shard_splitter != None for a parameter. Please open an issue." + "shard_indexer != None for a parameter. Please open an issue." ) if loaded_shard_id is None: @@ -534,8 +534,8 @@ def weight_loader(self, shard_size) # TODO: sync with QKV # If a param_shard_splitter is defined by the LinearMethod, use it. - elif param_shard_splitter is not None: - param_data, loaded_weight = param_shard_splitter( + elif shard_indexer is not None: + param_data, loaded_weight = shard_indexer( param_data, loaded_weight, loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index d542d415e6cf7..016e3b039d1e8 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -386,7 +386,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] params_dict = dict(self.named_parameters()) for name, loaded_weight in weights: - print(f"----- {name}") if "rotary_emb.inv_freq" in name: continue if ("rotary_emb.cos_cached" in name From f96428e692d2bccefa4799b7815ff63faf49442b Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 01:58:36 +0000 Subject: [PATCH 06/90] stash --- tests/quantization/test_fp8.py | 9 ++++++--- vllm/model_executor/models/llama.py | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index c121d25daebe7..6e38d570f8541 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -5,19 +5,22 @@ import pytest import torch +from vllm import SamplingParams from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.model_executor.layers.quantization.fp8 import Fp8LinearMethod capability = torch.cuda.get_device_capability() capability = capability[0] * 10 + capability[1] -print(capability) @pytest.mark.skipif( capability < QUANTIZATION_METHODS["fp8_static"].get_min_capability(), reason="FP8 is not supported on this GPU type.") def test_load_static_model(vllm_runner) -> None: - llm = vllm_runner("FriendliAI/Llama-2-7b-chat-hf-fp8", quantization="fp8_static") - print(llm) + llm = vllm_runner("FriendliAI/Llama-2-7b-chat-hf-fp8", quantization="fp8_static", enforce_eager=True) + model = llm.model.llm_engine.model_executor.driver_worker.model_runner.model + fc1 = model.model.decoder.layers[0].fc1 + assert isinstance(fc1.linear_method, Fp8LinearMethod) + print(llm.generate("Hello my name is", SamplingParams(max_tokens=20))) @pytest.mark.skipif( capability < QUANTIZATION_METHODS["fp8"].get_min_capability(), diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 016e3b039d1e8..22957a78cbc52 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -276,10 +276,13 @@ def forward( attn_metadata: AttentionMetadata, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: + self.embed_tokens.weight[0] = torch.Tensor([-2.0336052269225574e-36, 3.3207715410729622e-37, -1.5516525430854195e-35, 1.2319180796617573e-35, 1.2695338988880705e-35, 6.629788138637702e-36, -1.704466808692317e-36, 7.288064975098183e-36, -5.900981641127883e-36, 1.0626468931433479e-35, -9.4039548065783e-36, -1.1472824864025526e-35, -1.8337711872827685e-36, 1.3917853113735884e-35, -6.441709042506136e-36, -6.864887008802159e-36, 9.827132772874324e-36, -1.2283915966092904e-36, -2.915225990039273e-36, 9.051306501331614e-37, 7.617203393328423e-36, -6.441709042506136e-36, 5.830451980078546e-36, -1.4223481644949679e-36, -6.347669494440353e-36, 3.314894069318851e-36, -7.758262715427098e-36, -1.504632769052528e-36, 1.909002825735395e-35, 4.0907203408615605e-36, -5.430783900798968e-36, 4.255289549976681e-36, 5.360254239749631e-36, 1.1707923734189984e-35, -1.316553672920962e-35, -1.1472824864025526e-35, 4.70197740328915e-36, 1.288341808501227e-35, -2.7506567809241528e-36, -6.065550850243004e-36, 6.065550850243004e-36, 2.8446963289899358e-36, -6.488728816539027e-36, 4.2787994369931265e-36, 1.4011892661801667e-35, -1.2319180796617573e-35, -3.385423730368188e-36, -1.222514124855179e-35, 4.255289549976681e-36, 1.2871663141504048e-36, -1.814963277669612e-35, 8.416539551887579e-36, 3.7468882432460414e-37, 3.34281206015088e-38, -9.592033902709866e-36, -4.772507064338487e-36, 6.78847987599871e-37, 8.510579099953362e-36, 8.13442090769023e-36, -7.147005652999508e-36, 2.7624117244323756e-36, 3.7145621485984285e-36, -1.1872492943305104e-36, 2.0662986635548023e-38, 7.005946330900834e-36, 1.9513206223649973e-36, 1.0285575569695016e-36, -1.6692019781676483e-36, -4.255289549976681e-36, 3.8086016966642115e-36, -5.43666137255308e-37, -1.5398975995771966e-36, 4.255289549976681e-36, 4.302309324009572e-36, 1.9043008483321058e-36, -7.52316384526264e-36, 2.527312854267918e-36, 2.3980084756774665e-36, 4.607937855223367e-36, 9.262895484479626e-36, 1.2789378536946488e-35, -2.9387358770557188e-37, -1.9654265545748647e-35, 1.1660903960157092e-35, 4.913566386437162e-36, -4.043700566828669e-36, 1.5140367238591063e-35, 5.3132344657167395e-36, -5.172175143618065e-36, -2.6801271198748155e-36, 2.5155579107596953e-36, -1.034435028723613e-35, 7.241045201065291e-36, 3.314894069318851e-36, -2.9740007075803874e-36, 1.3753283904620764e-36, 9.968192094972998e-36, -5.571843222897643e-36, 2.8446963289899358e-36, 6.347669494440353e-36, 3.314894069318851e-36, 9.4039548065783e-36, 5.3132344657167395e-36, -2.3980084756774665e-36, -1.4670169498262148e-35, 1.1637394073140646e-36, 5.3132344657167395e-36, 4.114230227878006e-36, 6.770847460736376e-36, 1.8220162437745456e-36, -1.095560734966372e-35, 6.018531076210112e-36, -7.85230226349288e-36, -2.2804590405952378e-36, -1.516387712560751e-36, -2.433273306202135e-36, -8.287235173297127e-37, -3.0562853121379475e-36, -1.1707923734189984e-35, 1.4576129950196365e-36, -1.0861567801597937e-35, 2.82118644197349e-36, -9.827132772874324e-36, 2.926980933547496e-36, 3.408933617384634e-36, 7.85230226349288e-36, -1.774996469741654e-36, 1.2789378536946488e-35, 1.285696946211877e-37, 7.899322037525772e-36, -1.2166366531010676e-36, -1.095560734966372e-35, -2.8682062160063815e-36, -2.1393997184965633e-36, -1.0062231643038781e-35, 1.7985063567581e-36, 8.087401133657338e-36, 1.5942642133027274e-37, 4.631447742239813e-36, 9.450974580611192e-36, 1.1049646897729503e-35, -5.430783900798968e-36, 9.262895484479626e-36, 1.9513206223649973e-36, -5.054625708535836e-36, 5.289724578700294e-36, 2.228737289159057e-35, -1.2695338988880705e-35, 1.0861567801597937e-35, 1.532844633472263e-35, 1.504632769052528e-36, 1.3259576277275403e-35, -1.814963277669612e-35, -8.46355932592047e-36, -2.3039689276116835e-36, -9.497994354644083e-36, 1.1754943508222875e-36, -5.830451980078546e-36, 4.513898307157584e-36, 2.0100953399061116e-36, 1.0203290965137456e-35, -8.087401133657338e-36, 2.2216843230541234e-36, -8.346009890838241e-37, 5.430783900798968e-36, 9.827132772874324e-36, -4.53740819417403e-36, 1.1990042378387333e-36, 2.7506567809241528e-36, -3.6205226005326455e-36, 1.3259576277275403e-35, 1.1801963282255767e-35, -1.5422485882788412e-35, -1.974830509381443e-36, -1.3224311446750734e-36, -7.099985878966617e-36, -4.790139479600822e-37, 7.85230226349288e-36, 8.557598873986253e-36, 1.3259576277275403e-35, -2.433273306202135e-36, -1.0861567801597937e-35, 6.612155723375367e-37, 1.3259576277275403e-35, -7.85230226349288e-36, -2.456783193218581e-36, -4.725487290305596e-36, 2.5860875718090325e-36, 4.1847598889273435e-36, 1.1143686445795286e-35, 3.385423730368188e-36, 1.0297330513203239e-35, 1.1378785315959743e-35, -1.610427260626534e-36, 3.8086016966642115e-36, 9.592033902709866e-36, 9.874152546907215e-36, -1.8102613002663228e-36, -5.360254239749631e-36, -1.8102613002663228e-36, 7.946341811558664e-36, -5.853961867094992e-36, -5.853961867094992e-36, 1.0203290965137456e-35, -1.222514124855179e-35, -1.1190706219828177e-35, 4.513898307157584e-36, 9.462729524119414e-37, 6.864887008802159e-36, -7.099985878966617e-36, -2.057115113939003e-36, 1.0062231643038781e-35, -8.275480229788904e-36, -1.1660903960157092e-35, 1.7397316392169855e-36, -1.692711865184094e-36, -1.2319180796617573e-35, 2.715391950399484e-36, 1.128474576789396e-35, 8.181440681723121e-36, 1.3988382774785221e-36, 9.73309322480854e-36, -1.6080762719248893e-35, 5.466048731323637e-37, -2.0453601704307803e-36, 1.1190706219828177e-35, -1.1660903960157092e-35, 1.095560734966372e-35, -3.9966807927957775e-37, -6.535748590571919e-36, -1.1931267660846218e-36, -6.629788138637702e-36, 9.450974580611192e-36, -1.0579449157400588e-35, 6.206610172341678e-36, 5.430783900798968e-36, -2.997510594596833e-36, 2.362743645152798e-36, 1.1225971050352846e-36, -1.5281426560689738e-36, -1.344765537340697e-35, 1.9160557918403286e-36, 6.065550850243004e-36, 1.8901949161222383e-35, 1.4482090402130582e-35, -1.4199971757933233e-35, 1.2695338988880705e-35, -9.027796614315168e-36, -1.9513206223649973e-36, 7.005946330900834e-36, -2.080625000955449e-36, -9.450974580611192e-36, -4.4668785331246925e-36, -1.892545904823883e-36, -2.0782740122538043e-35, 4.161250001910898e-36, -1.0861567801597937e-35, 4.261167021730792e-37, 6.770847460736376e-36, -6.78847987599871e-37, -9.309915258512517e-36, -3.126814973187285e-36, 7.875812150509326e-37, -7.805282489459989e-36, 8.369519777854687e-36, 1.9689530376273316e-37, 1.986585452889666e-36, -3.879131357713549e-36, 4.137740114894452e-36, -4.5609180811904755e-36, 5.8774717541114375e-37, -9.027796614315168e-36, 1.1660903960157092e-35, 1.986585452889666e-36, 1.2319180796617573e-35, 8.9925317837905e-37, 6.770847460736376e-36, -4.1847598889273435e-36, 3.0562853121379475e-36, -5.987674349501027e-38, 2.3133728824182618e-35, -1.1566864412091309e-35, -1.3224311446750734e-36, -2.339233758136352e-36, 4.86654661240427e-36, -5.466048731323637e-37, 5.830451980078546e-36, -7.993361585591555e-36, 5.47780367483186e-36, 5.830451980078546e-36, -2.0218502834143345e-36, -6.58276836460481e-36, -8.639883478543813e-37, -1.692711865184094e-36, -2.433273306202135e-36, 1.3400635599374078e-36, 5.0311158215193905e-36, -6.958926556867942e-36, 5.3132344657167395e-36, 1.5234406786656846e-35, -1.3729774017604318e-35, 4.419858759091801e-36, -6.770847460736376e-36, 7.241045201065291e-36, -3.408933617384634e-36, -6.083183265505338e-37, 7.758262715427098e-36, -5.054625708535836e-36, 6.347669494440353e-36, -6.629788138637702e-36, -4.302309324009572e-36, 7.899322037525772e-36, -1.2413220344683356e-35, -1.570460452698576e-35, 1.065291755432698e-37, -7.664223167361315e-36, -1.3635734469538535e-35, -7.899322037525772e-36, -4.043700566828669e-36, 3.2443644082695135e-36, -2.7859216114488214e-36, -1.6574470346594254e-36, -1.6456920911512025e-36, 4.584427968206921e-36, -1.3917853113735884e-35, 1.3929608057244107e-36, 1.0285575569695016e-36, -9.827132772874324e-36, 2.1393997184965633e-36, 6.065550850243004e-36, 4.4668785331246925e-36, -3.549992939483308e-36, 1.0109251417071673e-36, 3.667542374565537e-36, -6.958926556867942e-36, 2.915225990039273e-36, -8.13442090769023e-36, -1.0767528253532154e-35, 1.4858248594393714e-35, 3.9026412447299945e-36, -3.220854521253068e-36, 4.419858759091801e-36, -4.86654661240427e-36, -8.757432913626042e-37, 7.099985878966617e-36, -1.2342690683634019e-36, 3.314894069318851e-36, 8.79269774415071e-36, -9.309915258512517e-36, 1.43880508540648e-35, 9.545014128676975e-36, 1.3283086164291849e-36, 8.510579099953362e-36, 1.8901949161222383e-35, -2.2099293795459005e-36, 9.07481638834806e-36, -3.0562853121379475e-36, 1.7514865827252084e-36, 3.032775425121502e-36, 2.527312854267918e-36, 1.3071497181143837e-35, 5.383764126766077e-36, 9.874152546907215e-36, 1.2977457633078054e-35, 9.07481638834806e-36, -1.4952288142459497e-35, 7.4350017689509685e-37, -5.383764126766077e-36, 7.382104523163966e-36, -2.268704097087015e-36, -2.0923799444636718e-36, 8.13442090769023e-36, 2.8917161030228273e-36, 9.827132772874324e-36, -1.8102613002663228e-36, -2.82118644197349e-36, -2.174664549021232e-36, -2.453844457341525e-37, -4.419858759091801e-36, 5.853961867094992e-36, -9.73309322480854e-36, -2.73890183741593e-36, 3.948926334793622e-38, 8.604618648019145e-36, -9.73309322480854e-36, -1.3635734469538535e-35, -7.052966104933725e-36, 5.289724578700294e-36, -7.241045201065291e-36, 1.128474576789396e-35, 1.2871663141504048e-36, 7.376227051409854e-37, 6.347669494440353e-36, 7.288064975098183e-36, -6.347669494440353e-36, 7.52316384526264e-36, 2.550822741284364e-36, -2.0218502834143345e-36, -1.5516525430854195e-36, 6.347669494440353e-36, -1.410593220986745e-36, 1.2037062152420224e-35, -5.3132344657167395e-36, 3.173834747220176e-36, 8.933757066249385e-36, 1.0297330513203239e-35, 8.040381359624447e-36, 1.3665121828309092e-37, 1.1801963282255767e-35, 6.018531076210112e-36, 3.5264830524668625e-36, -7.335084749131074e-36, 5.47780367483186e-36, -8.087401133657338e-36, 2.8917161030228273e-36, -3.032775425121502e-36, -5.025238349765279e-37, 9.771296791210265e-38, 1.0485409609334805e-35, 1.1990042378387333e-35, 1.0767528253532154e-35, -1.0638223874941702e-36, 7.170515540015954e-37, -7.85230226349288e-36, -1.0626468931433479e-35, -1.034435028723613e-35, 4.70197740328915e-36, 1.7115197747972506e-35, -1.4294011305999016e-35, -4.86654661240427e-36, 8.933757066249385e-36, -1.0579449157400588e-35, 7.429124297196857e-36, -7.288064975098183e-36, 1.0101904577379033e-38, -1.1707923734189984e-35, 6.112570624275895e-36, -1.0391370061269022e-35, 3.314894069318851e-36, -1.8572810742992143e-36, -1.095560734966372e-35, -2.362743645152798e-36, 2.4685381367268038e-36, 5.466048731323637e-37, 1.1801963282255767e-35, 9.027796614315168e-36, 3.92615113174644e-36, 9.309915258512517e-36, -1.3353615825341186e-35, 1.786751413249877e-36, 1.0579449157400588e-36, -1.3635734469538535e-35, 5.7834322060456545e-36, 7.429124297196857e-36, -3.314894069318851e-36, -1.034435028723613e-35, 8.040381359624447e-36, 4.231779662960235e-36, -6.676807912670593e-36, -9.262895484479626e-36, -3.079795199154393e-36, -2.5860875718090325e-36, 1.7632415262334313e-36, -4.8430367253878245e-36, -9.450974580611192e-36, -3.949661018762886e-36, 1.3635734469538535e-35, -6.958926556867942e-36, -3.314894069318851e-36, 1.0297330513203239e-35, 1.9654265545748647e-35, -1.6080762719248893e-35, 4.513898307157584e-36, -4.0907203408615605e-36, -9.686073450775649e-36, -4.678467516272704e-36, -1.1402295202976189e-36, -3.103305086170839e-36, -5.759922319029209e-37, 1.6339371476429796e-36, 2.2099293795459005e-36, -1.4987552972984166e-36, -7.570183619295532e-36, -1.0485409609334805e-35, 7.758262715427098e-36, -5.948001415160775e-36, -3.832111583680657e-36, 1.1002627123696611e-35, -2.6918820633830384e-36, 4.984096047486499e-36, -2.228737289159057e-35, 6.832560914154546e-38, -8.839717518183602e-36, -3.9966807927957775e-37, 1.504632769052528e-35, -3.032775425121502e-36, 5.289724578700294e-36, 6.91190678283505e-36, -1.1049646897729503e-35, -8.79269774415071e-36, -6.535748590571919e-36, 1.8102613002663228e-36, 4.067210453845115e-36, 4.7489971773220415e-36, 3.408933617384634e-36, 4.6549576292562585e-36, -1.0626468931433479e-35, -9.168855936413843e-36, 2.644862289350147e-36, 6.206610172341678e-36, -3.9966807927957775e-36, 4.772507064338487e-36, 7.993361585591555e-37, -5.054625708535836e-36, -4.043700566828669e-36, 2.5567002130384753e-37, -5.900981641127883e-36, -4.796016951354933e-36, -2.2099293795459005e-35, -1.410593220986745e-35, 1.275411370642182e-36, -1.6739039555709374e-35, -2.174664549021232e-36, 2.3274788146281293e-36, -4.4668785331246925e-36, -7.570183619295532e-36, -1.5398975995771966e-36, 3.9966807927957775e-36, -2.82118644197349e-36, -1.504632769052528e-35, 2.0218502834143345e-36, 3.738072035614874e-36, 4.2787994369931265e-36, 7.85230226349288e-36, -9.639053676742758e-37, -1.2789378536946488e-35, -8.933757066249385e-36, 1.0579449157400588e-35, -9.121836162380951e-36, -1.516387712560751e-36, 4.067210453845115e-36, 9.8153778293661e-37, -6.435831570752024e-37, -6.91190678283505e-36, 6.535748590571919e-36, -1.4294011305999016e-35, -2.4215183626939123e-36, -9.4039548065783e-37, 1.9654265545748647e-35, 3.855621470697103e-36, 2.82118644197349e-36, -5.0840130673063935e-37, 2.2216843230541234e-36, 7.946341811558664e-36, 1.3259576277275403e-35, -5.3132344657167395e-36, 8.275480229788904e-36, 4.208269775943789e-36, 7.241045201065291e-36, -9.844765188136658e-38, 9.545014128676975e-36, 5.736412432012763e-36, 5.736412432012763e-36, 9.215875710446734e-36, 2.0218502834143345e-35, 1.1801963282255767e-35, 2.6801271198748155e-36, -6.553381005834253e-37, 2.350988701644575e-36, 1.189600283032155e-35, -8.181440681723121e-36, -1.275411370642182e-36, -9.874152546907215e-36, 1.3635734469538535e-35, 4.3728389850589095e-36, -1.1143686445795286e-35, -1.1578619355599532e-36, -2.6801271198748155e-36, 3.103305086170839e-36, -6.065550850243004e-36, 2.433273306202135e-36, 1.5892683623117327e-35, 9.121836162380951e-36, -1.1660903960157092e-35, -1.2695338988880705e-35, 1.3297779843677127e-37, 8.557598873986253e-36, 8.322500003821796e-36, -5.5953531099140885e-36, -4.137740114894452e-36, 8.839717518183602e-36, 7.85230226349288e-36, -9.697828394283872e-37, 5.054625708535836e-36, 2.915225990039273e-36, 1.7679435036367204e-35, 3.103305086170839e-36, 9.73309322480854e-36, 8.510579099953362e-36, -5.242704804667402e-36, -9.027796614315168e-36, -1.3071497181143837e-35, -7.664223167361315e-36, -5.830451980078546e-36, -9.286405371496071e-37, 3.6205226005326455e-36, 4.584427968206921e-36, -8.369519777854687e-36, 9.592033902709866e-36, -7.758262715427098e-36, 2.2099293795459005e-36, 1.88079096131566e-35, 8.416539551887579e-36, -5.348499296241408e-37, -9.968192094972998e-36, 1.8455261307909914e-36, 9.545014128676975e-36, 1.288341808501227e-35, -3.489748854003666e-38, 1.3729774017604318e-35, 7.758262715427098e-36, -6.347669494440353e-36, 3.502973165450417e-36, -2.1981744360376776e-36, 3.0562853121379475e-36, 4.419858759091801e-36, -1.1378785315959743e-35, 3.667542374565537e-36, 1.0814548027565045e-36, 1.2695338988880705e-35, 1.4179400606793843e-37, -4.419858759091801e-36, -4.3728389850589095e-36, -2.6889433275059827e-37, 1.2977457633078054e-35, 4.302309324009572e-36, -5.289724578700294e-36, -1.1707923734189984e-35, -2.4685381367268038e-36, -5.524823448864751e-36, -6.817867234769268e-36, 7.147005652999508e-36, 2.1158898314801175e-35, -3.173834747220176e-36, -8.510579099953362e-36, 1.2037062152420224e-35, -7.241045201065291e-36, 5.830451980078546e-36, -1.4858248594393714e-35, 9.827132772874324e-36, 4.7489971773220415e-36, 4.8430367253878245e-36, -9.309915258512517e-36, 3.3384039563352965e-36, -5.830451980078546e-36, -3.820356640172434e-37, 7.93458686805044e-37, 8.287235173297127e-37, -2.1393997184965633e-36, 1.476420904632793e-35, -1.0767528253532154e-35, 3.6205226005326455e-36, -2.3744985886610208e-36, -2.6595559687354255e-37, 7.711242941394206e-36, -5.242704804667402e-36, -8.510579099953362e-36, -5.5953531099140885e-36, -1.1143686445795286e-35, 7.241045201065291e-36, -1.1660903960157092e-35, -7.335084749131074e-36, 1.974830509381443e-36, -8.040381359624447e-36, 8.087401133657338e-36, -5.948001415160775e-36, -6.488728816539027e-36, 8.557598873986253e-36, 5.195685030634511e-36, 2.715391950399484e-36, 1.786751413249877e-36, -7.288064975098183e-36, 8.886737292216494e-36, 5.0311158215193905e-36, 8.87498234870827e-37, 6.065550850243004e-36, -5.430783900798968e-36, -5.101645482568728e-36, 5.148665256601619e-36, 1.2037062152420224e-35, -1.1566864412091309e-35, -1.1190706219828177e-35, 7.617203393328423e-36, 1.1472824864025526e-35, -1.1143686445795286e-35, -7.476144071229749e-36, 4.114230227878006e-36, -5.853961867094992e-36, -7.382104523163966e-36, -6.300649720407461e-36, 5.101645482568728e-36, -5.195685030634511e-36, -2.3274788146281293e-36, 7.93458686805044e-37, 1.5892683623117327e-35, -1.5516525430854195e-35, 1.3635734469538535e-35, -4.0907203408615605e-36, -6.318282135669795e-37, -2.6918820633830384e-36, 2.8682062160063815e-36, -6.629788138637702e-36, -1.3353615825341186e-35, -6.9648040286220535e-37, 1.2131101700486007e-35, 3.173834747220176e-36, -1.0638223874941702e-36, 1.774996469741654e-36, -9.168855936413843e-37, 1.6739039555709374e-35, 9.827132772874324e-36, 8.13442090769023e-36, 7.335084749131074e-36, -3.0562853121379475e-36, 1.8337711872827685e-36, -1.9513206223649973e-36, -7.241045201065291e-36, 8.839717518183602e-36, 6.441709042506136e-36, -6.58276836460481e-36, 1.8901949161222383e-35, 1.0720508479499262e-35, 1.3400635599374078e-36, 6.958926556867942e-36, -3.849743998942992e-37, -3.479463278433971e-36, 2.5743326283008096e-36, 9.756603111824986e-37, -5.0311158215193905e-36, 1.974830509381443e-35, 8.980776840282277e-36, -7.335084749131074e-36, -1.8102613002663228e-36, -9.521504241660529e-37, 1.598672317118311e-35, 5.64237288394698e-36, 1.3541694921472752e-35, -1.5234406786656846e-35, -1.8995988709288166e-35, -1.2601299440814922e-35, -2.3157238711199064e-36, 8.510579099953362e-36, 5.113400426076951e-37, 3.9966807927957775e-36, -1.4670169498262148e-35, 1.3077374652897949e-37, 3.361913843351742e-36, -3.009265538105056e-36, -3.032775425121502e-36, -1.6739039555709374e-35, 6.065550850243004e-36, -2.73890183741593e-36, -1.034435028723613e-35, -6.018531076210112e-36, 1.6339371476429796e-36, -3.949661018762886e-36, 1.910178320086217e-37, 1.7773474584432987e-35, -1.034435028723613e-35, 4.913566386437162e-36, -7.699487997885983e-37, 6.817867234769268e-36, -3.197344634236622e-36, 5.5953531099140885e-36, -1.7118136483849562e-37, -1.3917853113735884e-35, 7.099985878966617e-36, 3.8086016966642115e-36, 6.300649720407461e-36, -2.6566172328583698e-36, -2.4685381367268038e-37, 1.7491355940235638e-35, 1.704466808692317e-36, -1.38238135656701e-35, -1.3541694921472752e-35, -2.9504908205639416e-36, -1.1660903960157092e-35, -2.350988701644575e-36, -4.772507064338487e-36, 1.0297330513203239e-35, -1.5798644075051544e-35, -4.70197740328915e-36, -1.0203290965137456e-35, 1.3259576277275403e-35, -2.433273306202135e-36, 1.3077374652897949e-37, 2.644862289350147e-36, 1.4223481644949679e-36, -8.557598873986253e-36, -3.573502826499754e-36, -2.4391507779562466e-37, -4.9370762734536075e-36, 4.349329098042464e-37, 1.0203290965137456e-35, -9.027796614315168e-36, -8.839717518183602e-36, 3.267874295285959e-36, 8.46355932592047e-36, -3.855621470697103e-36, -1.1990042378387333e-36, 7.005946330900834e-36, -2.4685381367268038e-36, 2.9740007075803874e-36, -9.07481638834806e-36, 1.3885527019088271e-37, -1.2413220344683356e-35, -4.984096047486499e-36, 1.664500000764359e-35, -7.335084749131074e-36, 1.128474576789396e-35, 1.0485409609334805e-35, 4.53740819417403e-36, 5.830451980078546e-36, 1.7397316392169855e-36, 2.5743326283008096e-36, -3.573502826499754e-36, 1.6339371476429796e-36, 1.316553672920962e-35, 8.275480229788904e-36, 2.7624117244323756e-36, -1.96307556587322e-36, 3.5264830524668625e-36, 5.172175143618065e-36, -5.383764126766077e-36, -1.7632415262334313e-36, 5.3132344657167395e-36, 7.85230226349288e-36, 2.2804590405952378e-36, 1.0485409609334805e-35, -1.1660903960157092e-35, 3.573502826499754e-36, -1.5516525430854195e-35, 3.691052261581983e-36, -7.099985878966617e-36, -6.553381005834253e-37, 1.0579449157400588e-35, 1.0203290965137456e-35, -1.0485409609334805e-35, -8.839717518183602e-36, 1.8220162437745456e-36, 2.3321807920314184e-35, -4.5609180811904755e-36, 3.361913843351742e-36, -6.676807912670593e-36, -1.169616879068176e-36, 6.394689268473244e-36, 1.1707923734189984e-35, -1.1660903960157092e-35, 1.2460240118716248e-36, -2.915225990039273e-36, -2.600781251194311e-37, 5.195685030634511e-36, 1.532844633472263e-35, 7.241045201065291e-36, -4.678467516272704e-36, -5.289724578700294e-36, 9.027796614315168e-36, -8.745677970117819e-36, -2.4832318161120824e-37, -4.067210453845115e-36, -5.101645482568728e-36, -3.7175008844754842e-37, -4.878301555912493e-37, -1.9278107353485515e-36, 4.4668785331246925e-36, -3.644032487549091e-36, -9.07481638834806e-36, -5.101645482568728e-36, -3.6205226005326455e-36, 1.1472824864025526e-35, 1.3071497181143837e-35, 5.289724578700294e-37, 3.5264830524668625e-36, -1.222514124855179e-35, -6.065550850243004e-36, 1.1343520485435074e-36, 1.2319180796617573e-35, 6.958926556867942e-36, -7.2880649750981825e-37, -1.9936384189945996e-35, -2.4215183626939123e-36, 1.4670169498262148e-35, 1.3459410316915192e-36, -7.052966104933725e-36, -4.114230227878006e-36, 1.369450918707965e-36, -1.8619830517025034e-35, -1.2871663141504048e-36, 4.261167021730792e-37, 5.383764126766077e-36, -1.0720508479499262e-35, 2.715391950399484e-36, -3.361913843351742e-36, -7.899322037525772e-36, -3.8086016966642115e-36, 2.5155579107596953e-36, -6.25362994637457e-36, -1.3353615825341186e-35, -4.848914197141936e-37, -3.0562853121379475e-36, -4.255289549976681e-36, 1.0391370061269022e-35, -1.288341808501227e-35, -1.1049646897729503e-35, -3.314894069318851e-36, -5.524823448864751e-36, 1.0696998592482816e-36, -6.629788138637702e-36, -1.8619830517025034e-35, 7.241045201065291e-36, -6.394689268473244e-36, -4.607937855223367e-36, 4.631447742239813e-36, 8.757432913626042e-37, -5.195685030634511e-36, -8.040381359624447e-36, 1.5892683623117327e-35, 2.2804590405952378e-36, -6.347669494440353e-36, 3.92615113174644e-36, 1.3106762011668506e-36, -1.2413220344683356e-35, 2.1041348879718946e-36, 1.4294011305999016e-35, -3.5264830524668625e-36, -1.2131101700486007e-35, -3.667542374565537e-36, 7.241045201065291e-36, 3.291384182302405e-36, -4.419858759091801e-36, -1.1990042378387333e-36, 3.92615113174644e-36, -1.0720508479499262e-35, -5.5953531099140885e-36, -1.8102613002663228e-36, 5.195685030634511e-36, -1.6692019781676483e-36, 4.9370762734536075e-36, 5.948001415160775e-36, -1.8337711872827685e-36, 2.9740007075803874e-36, 6.817867234769268e-36, 1.3929608057244107e-36, 1.9043008483321058e-36, -5.853961867094992e-36, 1.250725989274914e-35, -1.0579449157400588e-36, -1.095560734966372e-35, -2.9093485182851616e-37, 1.6971199689996776e-37, 2.550822741284364e-36, -2.621352402333701e-36, -5.101645482568728e-36, 5.289724578700294e-36, 2.456783193218581e-36, 4.208269775943789e-36, -4.255289549976681e-36, -1.96307556587322e-36, -1.1002627123696611e-35, -5.583598166405866e-37, 1.2977457633078054e-35, -1.5610564978919978e-35, -1.0062231643038781e-35, -8.79269774415071e-36, 2.6595559687354255e-37, 1.6574470346594254e-36, 8.191726257292816e-38, -9.497994354644083e-36, -5.101645482568728e-36, -1.1990042378387333e-35, 3.1503248602037305e-36, -8.9925317837905e-37, -5.5013135618483055e-36, -9.73309322480854e-36, 1.680956921675871e-36, 6.629788138637702e-36, -4.378716456813021e-37, 4.255289549976681e-36, 1.034435028723613e-35, -5.360254239749631e-36, -4.678467516272704e-36, 3.667542374565537e-36, -2.3980084756774665e-36, 2.5978425153172554e-36, 1.316553672920962e-35, 5.736412432012763e-36, 1.1472824864025526e-35, 1.4670169498262148e-35, -2.5978425153172554e-36, -1.1660903960157092e-35, 4.4668785331246925e-36, -1.6339371476429796e-36, -3.738072035614874e-36, 1.1660903960157092e-35, -8.228460455756013e-36, 4.4668785331246925e-37, 2.644862289350147e-36, 5.64237288394698e-37, -3.267874295285959e-36, 1.774996469741654e-36, 8.886737292216494e-36, 7.099985878966617e-36, -9.497994354644083e-36, 6.230120059358124e-37, -2.644862289350147e-36, -1.3917853113735884e-35, -4.796016951354933e-36, -4.1436175866485635e-37, 1.250725989274914e-35, 9.309915258512517e-36, -1.410593220986745e-36, -8.651638422052036e-36, -1.0226800852153901e-36, 1.786751413249877e-35, 6.78847987599871e-37, 2.6918820633830384e-36, -8.416539551887579e-36, -2.0453601704307803e-36, 3.3384039563352965e-36, 3.079795199154393e-36, 3.4324435044010795e-36, -2.644862289350147e-36, -1.3917853113735884e-35, -3.667542374565537e-36, 3.549992939483308e-36, 6.065550850243004e-36, 9.874152546907215e-36, 1.2695338988880705e-35, 2.82118644197349e-36, -7.805282489459989e-36, 1.357695975199742e-36, -1.9160557918403286e-36, 7.664223167361315e-36, -3.455953391417525e-36, 1.4341031080031908e-36, 2.1276447749883404e-36, -5.524823448864751e-36, -3.032775425121502e-36, 9.07481638834806e-36, 3.92615113174644e-36, 1.2107591813469561e-36, -8.510579099953362e-36, 3.502973165450417e-36, -9.968192094972998e-36, 8.13442090769023e-36, 5.6893926579798715e-36, 1.8995988709288166e-35, -1.3071497181143837e-35, -2.915225990039273e-36, -7.476144071229749e-36, 1.1472824864025526e-35, -1.0720508479499262e-35, -8.79269774415071e-36, 3.361913843351742e-36, -8.510579099953362e-36, -1.1801963282255767e-35, -6.347669494440353e-36, -2.1981744360376776e-36, -5.571843222897643e-36, -7.946341811558664e-36, 1.4199971757933233e-35, -1.96307556587322e-36, 1.5398975995771966e-36, -2.4685381367268038e-36, -1.7397316392169855e-36, 6.817867234769268e-36, 1.3988382774785221e-36, 7.147005652999508e-36, -1.4482090402130582e-35, 6.394689268473244e-36, -1.8220162437745456e-36, 8.181440681723121e-36, -6.488728816539027e-36, -7.805282489459989e-36, -8.839717518183602e-36, 1.7632415262334313e-36, 2.0923799444636718e-36, -3.4236272967699124e-37, -8.933757066249385e-36, -3.7145621485984285e-36, -2.3274788146281293e-36, 3.103305086170839e-36, -6.488728816539027e-36, 6.018531076210112e-36, 7.052966104933725e-37, 6.770847460736376e-36, -1.829363083467185e-37, 6.817867234769268e-36, 3.361913843351742e-36, -5.148665256601619e-36, -3.408933617384634e-36, -1.1190706219828177e-35, 1.6833079103775157e-35, 1.189600283032155e-35, -1.316553672920962e-35, 5.830451980078546e-36, -3.644032487549091e-36, -9.497994354644083e-36, 7.052966104933725e-36, -9.121836162380951e-36, 1.532844633472263e-35, 6.25362994637457e-36, -3.667542374565537e-36, -1.2871663141504048e-36, -4.231779662960235e-36, -8.228460455756013e-37, -2.73890183741593e-36, 1.3259576277275403e-35, 2.82118644197349e-36, -1.3929608057244107e-36, -3.585257770007977e-37, 7.335084749131074e-36, -7.711242941394206e-36, 1.516387712560751e-36, 2.1158898314801175e-36, -2.068870057447226e-35, 2.6801271198748155e-36, 9.62435999735748e-38, 2.550822741284364e-36, 6.018531076210112e-36, 5.0311158215193905e-36, -2.8917161030228273e-36, -2.5155579107596953e-36, 2.6566172328583698e-36, -1.4294011305999016e-35, -9.545014128676975e-36, 9.545014128676975e-36, 3.126814973187285e-36, 1.189600283032155e-35, -2.2099293795459005e-35, 2.0453601704307803e-36, -3.408933617384634e-36, -1.7679435036367204e-35, 2.256949153578792e-36, 4.584427968206921e-37, 6.770847460736376e-36, -5.0311158215193905e-36, 5.736412432012763e-36, 3.314894069318851e-36, -1.8455261307909914e-36, -9.027796614315168e-36, 6.876641952310382e-37, 1.5722236942248095e-37, 5.466048731323637e-37, 1.189600283032155e-35, -1.0626468931433479e-35, -1.410593220986745e-35, 1.4693679385278594e-37, -1.1660903960157092e-35, -1.8220162437745456e-36, -1.7397316392169855e-35, -2.7859216114488214e-36, -2.040658193027491e-35, -8.322500003821796e-36, 7.241045201065291e-36, 9.874152546907215e-36, 8.639883478543813e-37, -4.8430367253878245e-36, -1.570460452698576e-35, -6.018531076210112e-36, -5.430783900798968e-36, 2.097081921866961e-35, -1.2037062152420224e-35, 1.4987552972984166e-36, -8.639883478543813e-37, -2.8917161030228273e-36, 7.805282489459989e-36, -9.215875710446734e-36, -5.730534960258652e-37, -1.532844633472263e-35, -1.1143686445795286e-35, -2.7624117244323756e-36, -5.148665256601619e-36, 3.691052261581983e-36, 5.900981641127883e-36, -8.557598873986253e-36, 1.2166366531010676e-36, -4.419858759091801e-36, 3.949661018762886e-36, -1.0168026134612787e-36, -4.9370762734536075e-36, 4.4668785331246925e-36, -7.1940254270324e-36, 4.255289549976681e-36, 6.441709042506136e-36, -2.0336052269225574e-36, 5.289724578700294e-36, -5.5953531099140885e-36, -1.6574470346594254e-36, -7.288064975098183e-36, -7.899322037525772e-36, 3.032775425121502e-36, 2.8446963289899358e-36, 3.6205226005326455e-36, -1.1707923734189984e-35, 1.946618644961708e-35, -3.644032487549091e-36, 9.827132772874324e-36, -7.711242941394206e-36, -2.4215183626939123e-36, -5.054625708535836e-36, -3.173834747220176e-36, -9.309915258512517e-36, -7.335084749131074e-36, -7.93458686805044e-37, -1.38238135656701e-35, -1.88079096131566e-37, -6.864887008802159e-36, 2.8682062160063815e-36, -8.980776840282277e-36, -7.993361585591555e-36, -1.0814548027565045e-36, -5.571843222897643e-36, -1.1660903960157092e-35, -1.0626468931433479e-35, 1.986585452889666e-36, -2.4215183626939123e-36, 1.4870003537901937e-36, -1.7679435036367204e-35, -6.723827686703485e-36, -5.736412432012763e-36, 7.899322037525772e-36, -2.4795583962657627e-39, -2.82118644197349e-36, -1.8337711872827685e-36, 7.993361585591555e-36, -5.360254239749631e-36, -2.4920480237432495e-36, 7.335084749131074e-36, 1.6739039555709374e-35, -4.70197740328915e-36, 1.2577789553798476e-36, -3.8086016966642115e-36, -7.85230226349288e-36, -5.0311158215193905e-36, -3.9026412447299945e-36, -5.430783900798968e-36, 8.510579099953362e-36, -7.805282489459989e-36, 3.0562853121379475e-36, 2.424457098570968e-38, 1.9654265545748647e-35, -2.3980084756774665e-36, 5.6893926579798715e-36, -4.255289549976681e-36, -6.347669494440353e-36, -2.4920480237432495e-36, -1.0203290965137456e-35, 6.58276836460481e-36, -2.7976765549570443e-36, 2.0100953399061116e-36, 9.121836162380951e-36, -4.043700566828669e-36, -9.262895484479626e-36, 7.805282489459989e-36, 1.0297330513203239e-35, 4.53740819417403e-36, 7.335084749131074e-36, 2.915225990039273e-36, 7.993361585591555e-36, -2.2922139841034606e-36, -5.5953531099140885e-36, 2.5978425153172554e-36, -6.488728816539027e-36, 7.335084749131074e-36, 1.5281426560689738e-36, 2.5860875718090325e-36, 5.948001415160775e-36, 1.43880508540648e-35, 2.6918820633830384e-36, -4.2787994369931265e-36, 1.2989212576586277e-36, 7.899322037525772e-36, -1.1660903960157092e-35, -2.2216843230541234e-36, 1.3400635599374078e-36, 3.314894069318851e-36, -4.419858759091801e-36, 4.5609180811904755e-36, 7.335084749131074e-36, -1.2601299440814922e-35, 3.455953391417525e-36, 1.0391370061269022e-35, -7.581938562803754e-37, 1.0861567801597937e-35, 3.691052261581983e-36, -1.2601299440814922e-35, 5.5013135618483055e-36, -4.325819211026018e-36, 5.510129769479473e-39, -6.065550850243004e-36, 1.88079096131566e-36, 6.864887008802159e-36, 3.667542374565537e-36, -3.126814973187285e-36, -6.906029311080939e-37, 6.065550850243004e-36, -2.453844457341525e-37, 2.1452771902506747e-37, -1.1402295202976189e-36, 3.5264830524668625e-36, 9.262895484479626e-36, 9.874152546907215e-36, 9.827132772874324e-36, -3.197344634236622e-36, -1.4914084576057773e-37, 5.995021189193666e-36, 1.1660903960157092e-35, -1.4199971757933233e-35, -4.513898307157584e-36, -8.46355932592047e-36, 1.0203290965137456e-35, 6.347669494440353e-36, -4.396348872075355e-36, -1.8220162437745456e-36, 1.1801963282255767e-35, -1.0297330513203239e-35, 3.220854521253068e-36, 1.3929608057244107e-36, -9.874152546907215e-36, 3.738072035614874e-36, -1.2037062152420224e-35, -2.0218502834143345e-36, 3.0562853121379475e-36, -7.711242941394206e-36, 9.73309322480854e-36, 6.206610172341678e-36, -4.6549576292562585e-36, -1.4576129950196365e-35, -2.5860875718090325e-36, 1.0720508479499262e-35, 6.723827686703485e-36, -1.476420904632793e-35, -2.8446963289899358e-36, -1.0297330513203239e-35, -1.2695338988880705e-35, -3.3501588998435194e-37, -1.3812058622161878e-36, 2.3039689276116835e-36, -6.065550850243004e-36, -3.314894069318851e-36, -4.678467516272704e-36, 2.456783193218581e-36, -7.85230226349288e-36, -2.130583510865396e-37, 9.121836162380951e-36, -8.287235173297127e-37, 9.686073450775649e-36, 1.6971199689996776e-37, 3.455953391417525e-36, -2.5155579107596953e-36, -4.607937855223367e-36, 1.3541694921472752e-35, 5.830451980078546e-36, -5.7834322060456545e-36, 5.054625708535836e-36, 4.86654661240427e-36, 2.915225990039273e-36, -7.005946330900834e-36, -7.85230226349288e-36, -1.1049646897729503e-35, -1.8367099231598242e-39, -7.099985878966617e-36, 6.065550850243004e-36, 2.040658193027491e-35, 2.5978425153172554e-36, -9.07481638834806e-36, 5.524823448864751e-36, 1.9160557918403286e-36, 4.255289549976681e-36, -9.93292726444833e-37, -6.723827686703485e-36, 1.222514124855179e-35, 3.032775425121502e-36, -7.993361585591555e-36, -7.147005652999508e-36, -4.6549576292562585e-36, -1.0062231643038781e-35, 1.2107591813469561e-36, -1.4987552972984166e-36, 1.7021158199906723e-35, 4.631447742239813e-36, -1.1049646897729503e-35, 4.725487290305596e-36, -7.570183619295532e-36, -2.2381412439656354e-35, -3.9966807927957775e-36, 7.099985878966617e-36, 8.040381359624447e-36, -9.168855936413843e-36, -1.6574470346594254e-36, -6.347669494440353e-36, 1.476420904632793e-35, 1.0579449157400588e-35, 2.621352402333701e-36, -5.383764126766077e-36, 1.4482090402130582e-35, 5.7834322060456545e-36, 1.758539548830142e-35, -9.73309322480854e-36, -1.2930437859045163e-36, -7.335084749131074e-36, -9.4039548065783e-36, 5.5953531099140885e-36, 1.410593220986745e-36, -7.288064975098183e-36, -6.488728816539027e-36, -9.73309322480854e-36, 3.197344634236622e-36, 3.34281206015088e-38, 3.691052261581983e-36, -7.335084749131074e-36, -3.291384182302405e-36, 5.995021189193666e-37, 1.4576129950196365e-35, 1.9513206223649973e-36, -2.0923799444636718e-36, 1.1002627123696611e-35, -9.309915258512517e-36, 6.488728816539027e-36, -1.4670169498262148e-35, 3.361913843351742e-36, -5.148665256601619e-36, 1.7779352056187099e-37, -8.9925317837905e-37, -7.099985878966617e-36, 8.040381359624447e-36, -9.07481638834806e-36, 2.0718087933242817e-37, 3.832111583680657e-36, -3.361913843351742e-36, 1.2695338988880705e-35, 8.369519777854687e-36, -1.2342690683634019e-36, 4.631447742239813e-36, 6.676807912670593e-36, 2.1158898314801175e-37, -9.592033902709866e-36, 3.0562853121379475e-36, 1.43880508540648e-35, 1.598672317118311e-36, -2.6918820633830384e-36, 1.598672317118311e-35, -8.13442090769023e-36, -5.101645482568728e-36, -1.1143686445795286e-35, 5.360254239749631e-36, -1.034435028723613e-36, 6.206610172341678e-36, 1.1378785315959743e-35, -4.1847598889273435e-36, -5.571843222897643e-36, 4.7489971773220415e-36, -9.07481638834806e-36, -9.686073450775649e-36, -2.433273306202135e-36, -1.1660903960157092e-35, -3.549992939483308e-36, -1.4482090402130582e-35, 5.47780367483186e-36, 6.394689268473244e-36, 2.1535056507064307e-35, -1.6739039555709374e-35, -4.913566386437162e-36, -3.188528426605455e-37, 1.6221822041347568e-36, -2.1981744360376776e-36, -5.148665256601619e-36, 4.913566386437162e-36, 8.087401133657338e-36, -4.2787994369931265e-36, 3.832111583680657e-36, -7.1117408224748394e-37, -1.516387712560751e-36, 2.0218502834143345e-36, -2.8682062160063815e-36, 2.2334392665623463e-36, 2.453844457341525e-37, -4.325819211026018e-36, 4.8430367253878245e-36, 1.586917373610088e-36, 1.095560734966372e-35, 4.796016951354933e-36, 5.360254239749631e-36, 6.065550850243004e-36, 1.4199971757933233e-35, 7.429124297196857e-36, -1.1990042378387333e-36, -3.6205226005326455e-36, 9.309915258512517e-36, 1.095560734966372e-35, 9.968192094972998e-36, -1.7115197747972506e-35, -2.527312854267918e-36, -1.2695338988880705e-35, 3.949661018762886e-36, 8.839717518183602e-36, -1.8220162437745456e-36, -1.2319180796617573e-35, -3.220854521253068e-36, 7.052966104933725e-36, 2.2804590405952378e-36, 5.5013135618483055e-36, 1.0485409609334805e-35, -8.485599844998388e-38, 1.8243672324761902e-35, 5.995021189193666e-37, 6.770847460736376e-36, -6.488728816539027e-36, 4.137740114894452e-36, 3.96729343402522e-37, -1.1108421615270617e-36, -3.9966807927957775e-36, -3.92615113174644e-36, -6.629788138637702e-36, -3.502973165450417e-36, 2.162909605513009e-36, -1.0297330513203239e-35, -3.173834747220176e-36, 2.0923799444636718e-36, -5.571843222897643e-36, 1.774996469741654e-36, 1.189600283032155e-35, 1.5892683623117327e-35, -1.704466808692317e-36, 4.913566386437162e-36, -5.853961867094992e-36, 1.0767528253532154e-35, 3.4324435044010795e-36, -4.8430367253878245e-36, -8.557598873986253e-36, -3.549992939483308e-36, 9.73309322480854e-36, -1.0109251417071673e-35, -2.7624117244323756e-36, -6.676807912670593e-36, 4.913566386437162e-36, 4.255289549976681e-36, -2.621352402333701e-36, 1.516387712560751e-36, -7.570183619295532e-36, 2.7859216114488214e-36, -1.2037062152420224e-35, -7.617203393328423e-36, 1.3635734469538535e-35, -3.032775425121502e-36, -9.968192094972998e-36, -7.85230226349288e-36, -1.6456920911512025e-35, -1.0579449157400588e-35, -1.250725989274914e-35, 1.2695338988880705e-35, -4.114230227878006e-36, -1.263656427133959e-36, -1.3259576277275403e-35, -1.1343520485435074e-36, 4.443368646108247e-36, 2.1393997184965633e-36, -1.9513206223649973e-36, 1.0297330513203239e-35, 1.4752454102819708e-36, -1.1190706219828177e-35, 9.4039548065783e-36, 2.9534295564409974e-37, 1.2695338988880705e-35, -6.58276836460481e-36, 6.817867234769268e-36, -1.8901949161222383e-35, 6.25362994637457e-36, -5.830451980078546e-36, -3.5264830524668625e-36, -2.497925495497361e-37, 7.229290257557068e-37, -2.6801271198748155e-36, 7.899322037525772e-36, -8.087401133657338e-36, 1.128474576789396e-35, 8.275480229788904e-36, -7.099985878966617e-36, -3.667542374565537e-36, -5.383764126766077e-36, 2.4685381367268038e-36, 3.314894069318851e-36, -9.968192094972998e-36, 5.142787784847508e-37, -1.1660903960157092e-35, -1.2789378536946488e-35, -1.9395656788567744e-36, 6.018531076210112e-36, 1.974830509381443e-35, -5.6893926579798715e-36, -7.382104523163966e-36, 4.7489971773220415e-36, 4.86654661240427e-36, 4.0907203408615605e-36, -4.7489971773220415e-36, 1.758539548830142e-35, -5.571843222897643e-36, 3.173834747220176e-36, -1.2977457633078054e-35, -1.2107591813469561e-36, 2.380376060415132e-37, 4.490388420141138e-36, -5.7834322060456545e-36, 9.07481638834806e-36, 4.607937855223367e-36, -6.347669494440353e-36, -7.664223167361315e-36, -1.8619830517025034e-35, -5.0311158215193905e-36, -7.476144071229749e-36, 2.0923799444636718e-36, 3.76158192263132e-36, 4.86654661240427e-36, -9.07481638834806e-36, -5.47780367483186e-36, -8.839717518183602e-36, -3.314894069318851e-36, -8.557598873986253e-36, -1.3929608057244107e-36, -1.532844633472263e-35, -3.479463278433971e-36, -8.087401133657338e-36, 3.92615113174644e-36, -7.93458686805044e-37, -2.82118644197349e-36, 9.121836162380951e-36, -7.85230226349288e-36, -6.159590398308787e-36, -6.018531076210112e-36, -4.173004945419121e-37, -7.581938562803754e-37, -1.6339371476429796e-36, 8.522334043461584e-37, 2.7624117244323756e-36, 1.128474576789396e-35, -1.0062231643038781e-35, -6.629788138637702e-36, -9.73309322480854e-36, 1.1378785315959743e-35, -1.087332274510616e-36, 1.570460452698576e-35, -1.3259576277275403e-35, -1.5398975995771966e-36, -8.839717518183602e-36, 4.043700566828669e-36, -6.25362994637457e-36, 4.349329098042464e-36, 5.936246471652552e-37, -3.644032487549091e-36, -5.900981641127883e-36, 5.195685030634511e-36, -5.571843222897643e-36, 5.319111937470851e-37, 4.067210453845115e-36, 1.0520674439859473e-36, 4.231779662960235e-36, 4.255289549976681e-36, 9.73309322480854e-36, 1.7985063567581e-36, 1.1049646897729503e-36, -5.0311158215193905e-36, 7.241045201065291e-36, 1.532844633472263e-35, 4.231779662960235e-37, -1.6574470346594254e-36, 3.032775425121502e-36, -6.629788138637702e-36, -1.570460452698576e-35, 5.5953531099140885e-36, -5.571843222897643e-36, 1.986585452889666e-36, 2.6918820633830384e-36, 1.4294011305999016e-35, 2.0923799444636718e-36, 4.255289549976681e-36, 7.099985878966617e-36, 3.314894069318851e-36, -1.4576129950196365e-36, -4.1847598889273435e-36, -6.958926556867942e-36, 3.267874295285959e-36, 9.686073450775649e-36, 7.005946330900834e-36, -1.410593220986745e-36, -4.984096047486499e-36, 1.316553672920962e-35, 8.287235173297127e-37, 4.984096047486499e-36, 1.2037062152420224e-35, 7.147005652999508e-36, 1.2131101700486007e-35, -1.946618644961708e-35, -1.3353615825341186e-35, 9.991701981989444e-37, 1.8619830517025034e-35, -7.85230226349288e-36, -1.369450918707965e-36, -5.054625708535836e-36, 1.3917853113735884e-35, 5.383764126766077e-36, -1.1660903960157092e-35, 5.830451980078546e-36, 3.220854521253068e-36, -4.8430367253878245e-36, -9.262895484479626e-36, -5.319111937470851e-37, 9.497994354644083e-36, 4.513898307157584e-36, 1.7279766957087626e-36, 2.1393997184965633e-36, 1.3812058622161878e-36, 7.147005652999508e-36, -1.4576129950196365e-35, -2.3603926564511533e-35, -1.8572810742992143e-36, -3.949661018762886e-36, -2.1981744360376776e-36, 6.25362994637457e-36, 3.644032487549091e-36, 1.3917853113735884e-35, 7.429124297196857e-36, 5.830451980078546e-36, -7.376227051409854e-37, -8.181440681723121e-36, 2.997510594596833e-36, 8.839717518183602e-36, 9.215875710446734e-36, 1.128474576789396e-35, 9.168855936413843e-36, -4.796016951354933e-36, -1.8995988709288166e-35, -4.984096047486499e-36, 5.054625708535836e-36, 2.997510594596833e-36, 1.852579096895925e-35, -1.814963277669612e-35, 9.356935032545409e-36, -1.3259576277275403e-35, -3.173834747220176e-36, -7.711242941394206e-36, 7.758262715427098e-36, 5.965633830423109e-37, -7.93458686805044e-37, 8.181440681723121e-36, 5.5013135618483055e-36, 1.4011892661801667e-35, -1.316553672920962e-35, -1.96307556587322e-36, 1.1143686445795286e-35, 6.629788138637702e-36, 1.5516525430854195e-36, 4.8430367253878245e-36, -5.5013135618483055e-36, 3.0562853121379475e-36, 1.535489495761613e-37, -2.9504908205639416e-36, -7.288064975098183e-36, -1.2789378536946488e-35, -9.262895484479626e-36, 2.057115113939003e-37, -5.948001415160775e-36, 2.0218502834143345e-35, -9.309915258512517e-36, -2.8446963289899358e-36, -8.369519777854687e-36, -3.76158192263132e-36, -1.1378785315959743e-35, -8.510579099953362e-36, -3.0268979533673903e-37, 1.6530389308438418e-38, 7.85230226349288e-36, -2.0453601704307803e-36, -5.495436090094194e-37, -1.5234406786656846e-35, -9.874152546907215e-36, -3.691052261581983e-36, -2.5743326283008096e-36, -1.463490466773748e-36, 1.1402295202976189e-36, 9.827132772874324e-36, 1.095560734966372e-35, 6.817867234769268e-36, 7.147005652999508e-36, 1.0109251417071673e-35, -7.1940254270324e-36, 1.7412010071555134e-37, 3.5264830524668625e-36, -9.309915258512517e-36, 3.549992939483308e-36, -2.865267480129326e-37, -9.168855936413843e-36, -4.0907203408615605e-36, 8.839717518183602e-36, 4.4668785331246925e-36, -5.995021189193666e-37, -3.385423730368188e-36, 1.1049646897729503e-35, 4.4668785331246925e-36, 5.101645482568728e-36, -3.855621470697103e-36, -5.289724578700294e-36, -3.667542374565537e-36, 4.86654661240427e-36, 1.570460452698576e-35, 5.319111937470851e-37, -7.617203393328423e-36, -3.549992939483308e-36, -1.4914084576057773e-37, 2.1981744360376776e-36, -9.356935032545409e-36, -5.64237288394698e-36, -1.758539548830142e-35, 3.103305086170839e-36, -1.9043008483321058e-36, -2.4920480237432495e-36, -1.3635734469538535e-35, -1.570460452698576e-35, 2.9504908205639416e-36, 1.1519844638058418e-36, -3.079795199154393e-36, -1.7397316392169855e-36, -9.121836162380951e-36, -6.676807912670593e-36, -7.429124297196857e-36, 1.0579449157400588e-35, 2.8682062160063815e-36, 7.85230226349288e-36, -5.5953531099140885e-36, -7.85230226349288e-36, -1.1578619355599532e-36, -6.629788138637702e-36, -3.408933617384634e-36, -2.7976765549570443e-36, -6.864887008802159e-36, 6.112570624275895e-36, -5.289724578700294e-36, -1.288341808501227e-35, -4.302309324009572e-36, -1.2413220344683356e-35, 1.3224311446750734e-36, -4.043700566828669e-36, -4.6549576292562585e-36, -6.465218929522581e-37, -9.07481638834806e-36, 9.968192094972998e-36, 5.612985525176423e-37, -6.488728816539027e-36, -8.557598873986253e-36, 3.314894069318851e-36, 5.995021189193666e-36, -2.1723135603195873e-35, 1.570460452698576e-35, -7.758262715427098e-36, 7.85230226349288e-36, -6.58276836460481e-36, -1.626884181538046e-35, 2.362743645152798e-36, 1.516387712560751e-36, 1.4482090402130582e-35, -8.05213630313267e-37, 7.805282489459989e-36, 3.573502826499754e-36, 1.1660903960157092e-35, -1.7162217522005398e-36, 1.476420904632793e-35, -8.933757066249385e-36, -4.513898307157584e-36, -1.7985063567581e-36, 1.610427260626534e-36, -2.1393997184965633e-36, 2.915225990039273e-36, -1.3729774017604318e-35, -1.1566864412091309e-35, -1.1660903960157092e-35, 1.570460452698576e-35, 1.4576129950196365e-35, -2.4685381367268038e-36, -2.339233758136352e-36, 2.433273306202135e-36, -1.034435028723613e-36, -1.6739039555709374e-35, -1.3729774017604318e-35, 7.241045201065291e-36, -1.2695338988880705e-35, -1.1801963282255767e-35, 1.2319180796617573e-35, -1.3988382774785221e-36, 2.068870057447226e-35, -3.314894069318851e-36, -6.58276836460481e-36, 8.087401133657338e-36, 5.853961867094992e-36, -1.9936384189945996e-35, 1.9395656788567744e-36, -9.756603111824986e-37, -1.774996469741654e-36, -1.1801963282255767e-35, -7.993361585591555e-36, -6.676807912670593e-36, -3.220854521253068e-36, -5.0311158215193905e-36, -6.300649720407461e-36, 7.971321066513637e-38, 4.2787994369931265e-36, -2.0218502834143345e-36, 3.879131357713549e-36, 7.476144071229749e-36, -1.8337711872827685e-36, 1.4341031080031908e-36, 9.309915258512517e-36, 5.0311158215193905e-36, -1.0168026134612787e-36, -1.9654265545748647e-35, -5.571843222897643e-36, -7.899322037525772e-36, -8.228460455756013e-36, 7.617203393328423e-36, 1.0579449157400588e-35, 6.723827686703485e-36, 1.8337711872827685e-36, -7.241045201065291e-36, 5.360254239749631e-36, 8.087401133657338e-36, 1.4952288142459497e-35, -1.1049646897729503e-35, -5.571843222897643e-36, -1.5234406786656846e-35, -5.383764126766077e-36, 1.4294011305999016e-35, 4.984096047486499e-36, -9.07481638834806e-36, 1.504632769052528e-35, 7.099985878966617e-36, 7.241045201065291e-36, -1.8572810742992143e-36, -4.86654661240427e-36, 9.036612821946335e-38, -1.0297330513203239e-35, 3.0562853121379475e-36, 1.2416159080560412e-37, 7.288064975098183e-36, -3.9966807927957775e-36, 1.1378785315959743e-35, 6.817867234769268e-36, -5.47780367483186e-36, -8.181440681723121e-36, -1.2131101700486007e-35, 7.664223167361315e-36, 4.7489971773220415e-36, 1.5234406786656846e-35, 5.383764126766077e-36, 6.065550850243004e-36, 6.018531076210112e-36, 6.700317799687039e-37, 2.997510594596833e-36, 1.974830509381443e-35, -3.291384182302405e-36, -2.915225990039273e-36, -5.8774717541114375e-37, -1.0050476699530558e-36, -6.676807912670593e-36, -4.396348872075355e-36, -2.2099293795459005e-36, 1.5234406786656846e-35, 3.479463278433971e-36, 2.0100953399061116e-36, -1.626884181538046e-35, -1.664500000764359e-35, -1.2460240118716248e-36, -3.173834747220176e-36, 2.7624117244323756e-36, 8.557598873986253e-36, -1.2131101700486007e-35, -1.4341031080031908e-36, -1.0109251417071673e-35, 8.087401133657338e-36, 4.678467516272704e-36, 9.827132772874324e-36, 4.3728389850589095e-36, -9.827132772874324e-36, 4.9370762734536075e-36, 1.43880508540648e-35, -4.9370762734536075e-37, 6.817867234769268e-36, -4.984096047486499e-36, 6.700317799687039e-37, -1.8337711872827685e-36, -7.993361585591555e-36, -2.456783193218581e-36, -3.502973165450417e-36, -4.772507064338487e-36, 5.571843222897643e-36, 1.5140367238591063e-35, -1.2131101700486007e-35, 8.980776840282277e-36, -1.6339371476429796e-36, -9.686073450775649e-36, -8.557598873986253e-36, 1.0285575569695016e-36, -1.189600283032155e-35, 4.1436175866485635e-37, 4.6549576292562585e-36, -9.309915258512517e-36, 7.617203393328423e-36, -5.47780367483186e-36, 1.0391370061269022e-35, -8.087401133657338e-36, -3.408933617384634e-36, -4.020190679812223e-36, -1.4858248594393714e-35, 3.6205226005326455e-36, 7.382104523163966e-36, -4.208269775943789e-36, 5.383764126766077e-36, -4.9370762734536075e-36, 7.382104523163966e-36, 9.545014128676975e-36, 1.8337711872827685e-35, 6.535748590571919e-36, 7.288064975098183e-36, -1.38238135656701e-35, 1.0062231643038781e-35, -2.73890183741593e-36, 1.5798644075051544e-35, 7.993361585591555e-36, -1.0579449157400588e-35, 2.8682062160063815e-36, 3.455953391417525e-36, -1.2930437859045163e-36, -7.241045201065291e-36, 8.416539551887579e-36, 1.1343520485435074e-36, -7.335084749131074e-36, -1.1801963282255767e-35, -5.5953531099140885e-36, 8.651638422052036e-36, -5.830451980078546e-36, -4.161250001910898e-36, -5.948001415160775e-36, 1.2789378536946488e-35, -4.796016951354933e-36, -1.0297330513203239e-35, -1.9043008483321058e-36, -4.984096047486499e-36, 1.610427260626534e-36, -2.3862535321692436e-36, 6.58276836460481e-36, -1.9513206223649973e-36, 1.3635734469538535e-35, -1.1660903960157092e-35, 3.76158192263132e-36, 3.667542374565537e-36, 8.13442090769023e-36, -1.0626468931433479e-35, 6.394689268473244e-36, 1.1990042378387333e-35, 1.786751413249877e-36, 1.189600283032155e-35, -1.1002627123696611e-35, 2.9740007075803874e-36, -5.995021189193666e-36, 9.545014128676975e-36, -1.0626468931433479e-35, 5.6893926579798715e-36, 1.0203290965137456e-35, -1.0297330513203239e-35, 4.772507064338487e-36, 1.5610564978919978e-35, -4.607937855223367e-36, -1.3635734469538535e-35, -5.853961867094992e-36, 3.455953391417525e-36, 1.9043008483321058e-36, 1.128474576789396e-35, 4.173004945419121e-37, -2.644862289350147e-36, -1.0062231643038781e-35, 1.087332274510616e-36, -3.408933617384634e-36, 5.5013135618483055e-36, 3.479463278433971e-36, 1.288341808501227e-35, 5.47780367483186e-36, -1.0767528253532154e-35, 6.58276836460481e-36, 6.676807912670593e-36, -3.197344634236622e-36, 1.1707923734189984e-35, -8.980776840282277e-36, 1.1660903960157092e-35, -7.875812150509326e-37, 3.2443644082695135e-36, -3.790969281401877e-37, 6.37705685321091e-37, -1.8220162437745456e-36, -1.2283915966092904e-36, 1.986585452889666e-36, -2.997510594596833e-36, -2.9534295564409974e-37, -4.043700566828669e-36, 7.93458686805044e-37, 4.8430367253878245e-36, 1.0109251417071673e-35, 3.3384039563352965e-36, -4.9370762734536075e-36, -9.827132772874324e-36, 5.172175143618065e-36, 5.5953531099140885e-36, -1.0109251417071673e-35, 5.242704804667402e-36, 1.1143686445795286e-35, 1.344765537340697e-35, -3.7145621485984285e-36, -4.796016951354933e-36, -3.92615113174644e-36, 2.456783193218581e-36, -1.7115197747972506e-35, 3.455953391417525e-36, -1.1707923734189984e-35, -2.915225990039273e-36, 7.993361585591555e-36, -4.885648395605132e-38, -9.497994354644083e-36, 1.9043008483321058e-36, -2.162909605513009e-36, -7.052966104933725e-36, -1.0168026134612787e-36, -4.513898307157584e-36, 3.691052261581983e-36, -3.573502826499754e-36, 1.626884181538046e-35, -2.1276447749883404e-36, 1.6456920911512025e-35, 8.933757066249385e-36, -8.604618648019145e-36, 8.745677970117819e-36, 7.476144071229749e-36, 8.604618648019145e-36, 1.1461069920517303e-36, 7.099985878966617e-36, -1.2789378536946488e-35, 4.419858759091801e-36, -5.948001415160775e-36, -1.4011892661801667e-35, -1.1566864412091309e-35, -2.5860875718090325e-36, -8.040381359624447e-36, 8.040381359624447e-36, 9.309915258512517e-36, -4.419858759091801e-36, -3.385423730368188e-36, 4.8430367253878245e-36, -8.557598873986253e-36, -3.032775425121502e-36, -1.2037062152420224e-35, 7.617203393328423e-36, -7.147005652999508e-36, -1.1190706219828177e-35, -2.915225990039273e-36, -4.2795341209623905e-38, 1.504632769052528e-35, -6.676807912670593e-36, 1.9395656788567744e-36, -1.2037062152420224e-35, 9.592033902709866e-36, 7.993361585591555e-36, 1.3635734469538535e-35, -1.2789378536946488e-35, -2.1393997184965633e-36, 5.054625708535836e-36, 1.3929608057244107e-36, 4.8430367253878245e-36, 3.6205226005326455e-36, -3.573502826499754e-36, 1.7021158199906723e-35, -5.571843222897643e-36, -8.557598873986253e-36, 6.723827686703485e-36, 8.604618648019145e-36, -1.0062231643038781e-35, 6.535748590571919e-36, 4.1847598889273435e-36, -3.502973165450417e-36, 1.3259576277275403e-35, 1.6833079103775157e-35, -2.0130340757831674e-37, 1.0579449157400588e-35, -8.933757066249385e-36, 1.6080762719248893e-35, 2.3656823810298536e-37, -1.2131101700486007e-35, 5.900981641127883e-36, 6.817867234769268e-36, 7.85230226349288e-36, 1.2789378536946488e-35, -9.215875710446734e-36, -8.13442090769023e-36, -3.009265538105056e-36, -3.079795199154393e-36, 2.7859216114488214e-36, 3.4236272967699124e-37, -8.87498234870827e-37, 3.855621470697103e-36, -2.621352402333701e-36, 1.8337711872827685e-36, -9.07481638834806e-36, -5.242704804667402e-36, -5.47780367483186e-36, 2.4685381367268038e-36, -4.255289549976681e-36, -1.5140367238591063e-35, -1.0109251417071673e-35, 1.586917373610088e-36, -5.5953531099140885e-36, -9.968192094972998e-36, -1.0109251417071673e-36, -1.0626468931433479e-35, -2.0218502834143345e-36, -5.5542108076353085e-37, -1.3259576277275403e-35, -4.6549576292562585e-36, -5.0311158215193905e-36, 1.9983403963978888e-36, 1.7632415262334313e-36, 5.5953531099140885e-36, 7.288064975098183e-36, 6.6415430821459244e-37, 8.632536638851174e-38, 1.0861567801597937e-35, 1.128474576789396e-35, -1.0579449157400588e-35, -1.88079096131566e-37, 1.570460452698576e-35, 4.208269775943789e-36, -7.617203393328423e-36, -1.6739039555709374e-35, -2.3274788146281293e-36, 1.96307556587322e-36, 9.874152546907215e-36, 2.1276447749883404e-36, -8.745677970117819e-36, -5.524823448864751e-36, -6.864887008802159e-36, 3.2326094647612906e-37, 4.513898307157584e-36, -9.686073450775649e-36, 1.8220162437745456e-36, 1.5234406786656846e-35, -1.5398975995771966e-36, -9.07481638834806e-36, -8.228460455756013e-36, 1.1049646897729503e-35, 1.6574470346594254e-36, -7.335084749131074e-36, -4.984096047486499e-36, 5.47780367483186e-36, 4.9370762734536075e-36, -2.527312854267918e-36, 9.73309322480854e-36, -3.408933617384634e-36, 1.3259576277275403e-35, -6.723827686703485e-36, -1.5398975995771966e-36, 6.700317799687039e-37, 1.1637394073140646e-36, -2.926980933547496e-36, -1.2789378536946488e-35, -9.309915258512517e-36, -1.4752454102819708e-36, 1.9043008483321058e-36, -1.4482090402130582e-35, 4.231779662960235e-37, 7.85230226349288e-36, -9.462729524119414e-37, -1.9395656788567744e-36, 1.610427260626534e-36, -9.07481638834806e-36, -8.13442090769023e-36, -9.545014128676975e-36, 7.899322037525772e-36, -1.2601299440814922e-35, 2.1441016958998524e-35, -4.419858759091801e-36, 6.206610172341678e-36, 4.1436175866485635e-37, -7.170515540015954e-37, 1.598672317118311e-35, 2.0923799444636718e-36, -1.4858248594393714e-35, -2.1276447749883404e-36, 1.7632415262334313e-36, 9.309915258512517e-36, -5.830451980078546e-36, 5.360254239749631e-36, -3.479463278433971e-36, -7.335084749131074e-36, 5.7834322060456545e-36, -2.3274788146281293e-36, 1.1566864412091309e-35, -8.839717518183602e-36, -1.6080762719248893e-35, -1.774996469741654e-36, -9.874152546907215e-36, 8.087401133657338e-36, -3.92615113174644e-36, -3.291384182302405e-36, -8.05213630313267e-37, 4.4668785331246925e-36, 8.999878623483139e-39, -5.172175143618065e-37, -5.242704804667402e-36, -1.8440567628524635e-37, -1.0062231643038781e-35, 1.1566864412091309e-35, 2.5743326283008096e-36, -3.973170905779332e-36, -5.7834322060456545e-36, 9.121836162380951e-36, 4.302309324009572e-36, 1.814963277669612e-35, -5.47780367483186e-36, -4.760752120830264e-37, -9.07481638834806e-36, -4.231779662960235e-36, -5.101645482568728e-36, 4.796016951354933e-36, 9.168855936413843e-36, 1.720923729603829e-35, -4.137740114894452e-36, -4.255289549976681e-36, 9.73309322480854e-36, -2.9740007075803874e-36, 3.76158192263132e-36, 4.419858759091801e-36, 8.9925317837905e-37, -5.948001415160775e-36, -1.3958995416014664e-37, 4.4668785331246925e-36, -1.8337711872827685e-36, -1.8337711872827685e-36, -6.018531076210112e-36, 2.915225990039273e-36, -5.3132344657167395e-36, -1.1872492943305104e-36, 4.419858759091801e-36, -1.3635734469538535e-35, 1.6080762719248893e-35, -1.6971199689996776e-37, 6.112570624275895e-36, 2.456783193218581e-36, 5.571843222897643e-36, -1.2695338988880705e-35, -1.4670169498262148e-35, 8.087401133657338e-36, -4.231779662960235e-36, 1.5398975995771966e-36, 3.103305086170839e-36, 8.651638422052036e-36, 8.040381359624447e-36, -4.349329098042464e-36, 5.172175143618065e-36, -1.1108421615270617e-36, -1.2413220344683356e-35, 2.4685381367268038e-36, -9.309915258512517e-36, 5.900981641127883e-36, 2.080625000955449e-36, 1.2131101700486007e-35, 1.3812058622161878e-37, 4.760752120830264e-37, 4.584427968206921e-36, 6.58276836460481e-36, -2.2099293795459005e-35, -1.504632769052528e-36, -2.6801271198748155e-36, -1.1660903960157092e-35, -9.07481638834806e-36, 2.5860875718090325e-36, -9.121836162380951e-36, 1.2283915966092904e-36, 9.93292726444833e-37, -6.723827686703485e-36, -1.0109251417071673e-35, -5.47780367483186e-36, -6.159590398308787e-36, -3.879131357713549e-36, 8.79269774415071e-36, 3.879131357713549e-36, -9.592033902709866e-36, 9.027796614315168e-36, -1.4458580515114136e-36, 6.535748590571919e-36, 6.441709042506136e-36, -1.2989212576586277e-36, -3.0562853121379475e-36, -1.1002627123696611e-35, 3.6205226005326455e-36, 6.676807912670593e-36, 1.598672317118311e-35, 1.5634074865936424e-36, 1.2131101700486007e-35, 7.335084749131074e-36, 8.087401133657338e-36, 1.3635734469538535e-35, -1.2695338988880705e-35, -1.8572810742992143e-36, -6.629788138637702e-36, -1.0297330513203239e-35, -3.92615113174644e-36, 2.527312854267918e-36, 3.6205226005326455e-36, 2.621352402333701e-36, -5.936246471652552e-37, -2.73890183741593e-36, -1.5281426560689738e-36, 8.933757066249385e-36, 3.785091809647766e-36, -9.686073450775649e-36, -1.410593220986745e-36, -8.557598873986253e-36, -7.899322037525772e-36, 1.774996469741654e-36, 1.034435028723613e-35, -4.419858759091801e-36, 1.3729774017604318e-35, 9.450974580611192e-36, 8.369519777854687e-36, 1.6339371476429796e-36, 5.148665256601619e-36, 6.700317799687039e-37, 3.667542374565537e-36, -5.289724578700294e-36, -3.549992939483308e-36, 2.997510594596833e-36, 7.335084749131074e-36, -1.2460240118716248e-36, 8.087401133657338e-36, -1.9936384189945996e-35, 6.159590398308787e-36, -9.545014128676975e-36, -4.760752120830264e-37, -1.9513206223649973e-36, -2.915225990039273e-36, -1.0062231643038781e-35, 1.7021158199906723e-35, -1.516387712560751e-36, -1.5398975995771966e-36, -1.0050476699530558e-36, -1.0461899722318359e-36, -4.8430367253878245e-36, -7.899322037525772e-36, 3.644032487549091e-36, -1.88079096131566e-36, -4.70197740328915e-38, -1.087332274510616e-36, -2.915225990039273e-36, -1.692711865184094e-35, -1.570460452698576e-35, 5.736412432012763e-36, 2.9740007075803874e-36, -9.462729524119414e-37, -6.347669494440353e-36, 6.535748590571919e-36, -1.9983403963978888e-36, -4.349329098042464e-36, 7.85230226349288e-36, -1.1931267660846218e-36, -3.6205226005326455e-36, 1.5234406786656846e-35, 9.110081218872728e-37, -2.3980084756774665e-36, -2.9740007075803874e-36, 2.747718045047097e-37, -2.245194210070569e-36, 1.344765537340697e-35, 1.2789378536946488e-35, -4.419858759091801e-36, -8.322500003821796e-36, -1.7773474584432987e-35, -4.607937855223367e-36, -6.206610172341678e-36, -1.7118136483849562e-37, -5.101645482568728e-36, -2.1393997184965633e-36, 4.0907203408615605e-36, 1.704466808692317e-36, 4.70197740328915e-36, 2.1723135603195873e-35, -5.242704804667402e-36, -3.032775425121502e-36, -3.949661018762886e-36, 7.099985878966617e-36, -1.128474576789396e-35, -9.121836162380951e-36, -4.772507064338487e-36, -9.545014128676975e-36, 1.0520674439859473e-36, 6.629788138637702e-36, 1.9043008483321058e-36, 5.830451980078546e-36, -1.128474576789396e-36, 1.8734441216230207e-37, 6.817867234769268e-36, -9.051306501331614e-37, 4.53740819417403e-36, -1.9513206223649973e-36, 2.5155579107596953e-36, 2.82118644197349e-37, 3.3501588998435194e-37, 3.9026412447299945e-36, -2.151154662004786e-36, -7.85230226349288e-36, 2.1041348879718946e-36, 8.087401133657338e-36, -4.86654661240427e-36, -2.715391950399484e-36, -5.360254239749631e-36, 8.040381359624447e-36, 1.7491355940235638e-35, -1.0579449157400588e-35, 2.5155579107596953e-36, 4.419858759091801e-36, 1.946618644961708e-35, 6.171345341817009e-37, 1.0109251417071673e-36, 3.9966807927957775e-36, -1.5140367238591063e-35, -1.1660903960157092e-35, -9.497994354644083e-36, 1.6221822041347568e-36, 7.699487997885983e-37, 2.621352402333701e-36, -3.8086016966642115e-36, -3.6205226005326455e-36, -1.2131101700486007e-35, -2.5860875718090325e-36, -3.408933617384634e-36, 1.0109251417071673e-36, -2.4215183626939123e-36, 9.4039548065783e-36, -4.043700566828669e-36, -4.643202685748036e-37, -1.075577331002393e-36, -1.2989212576586277e-36, 1.3729774017604318e-35, 3.9026412447299945e-36, 7.052966104933725e-36, -3.220854521253068e-36, 1.4199971757933233e-35, -1.974830509381443e-35, 1.7115197747972506e-35, 5.101645482568728e-36, -5.7834322060456545e-36, -9.027796614315168e-36, 6.488728816539027e-36, -6.441709042506136e-36, -1.3635734469538535e-35, -1.3259576277275403e-35, 5.195685030634511e-36, -5.360254239749631e-36, 7.4350017689509685e-37, 5.383764126766077e-36, 9.345180089037186e-37, -7.85230226349288e-36, 8.886737292216494e-36, 6.864887008802159e-36, -1.5634074865936424e-36, 9.545014128676975e-36, 1.5892683623117327e-35, 1.4576129950196365e-35, -1.5610564978919978e-35, 4.419858759091801e-36, 1.034435028723613e-35, 7.52316384526264e-36, 3.9966807927957775e-36, 1.7021158199906723e-35, 6.300649720407461e-36, 2.2804590405952378e-36, 1.189600283032155e-35, -1.0062231643038781e-35, 3.502973165450417e-36, -1.9160557918403286e-36, 1.4294011305999016e-35, 2.550822741284364e-36, 3.549992939483308e-36, -2.6801271198748155e-36, 2.5743326283008096e-36, -1.1660903960157092e-35, -6.629788138637702e-36, -3.291384182302405e-36, 7.85230226349288e-36, -5.948001415160775e-36, -1.5634074865936424e-36, 1.1637394073140646e-36, -3.385423730368188e-36, -5.195685030634511e-36, 2.0218502834143345e-36, 4.6549576292562585e-36, 4.984096047486499e-36, -1.8587504422377421e-37, 1.6174802267314676e-35, -5.5013135618483055e-36, 1.610427260626534e-36, -5.6893926579798715e-36, -1.986585452889666e-36, -1.410593220986745e-35, -7.758262715427098e-36, 2.3274788146281293e-36, 5.348499296241408e-37, 5.571843222897643e-36, 9.545014128676975e-36, -3.479463278433971e-36, -9.07481638834806e-36, 1.4576129950196365e-36, 2.527312854267918e-37, -9.697828394283872e-37, 1.4199971757933233e-35, -1.1707923734189984e-35, 1.2342690683634019e-36, -1.2989212576586277e-36, 3.832111583680657e-36, -2.6918820633830384e-36, 2.865267480129326e-37, 5.995021189193666e-36, 3.5970127135162e-36, -6.300649720407461e-36, 1.0767528253532154e-35, -1.664500000764359e-35, 6.770847460736376e-36, -3.790969281401877e-37, 3.738072035614874e-36, 1.189600283032155e-35, -1.7303276844104072e-35, -4.607937855223367e-36, 1.909002825735395e-35, 5.64237288394698e-36, 6.535748590571919e-36, 1.0461899722318359e-36, 3.103305086170839e-36, -7.335084749131074e-36, -9.4039548065783e-36, -1.0579449157400588e-35, 9.874152546907215e-36, 1.4870003537901937e-36, -3.92615113174644e-36, -1.3259576277275403e-35, 1.974830509381443e-36, -2.997510594596833e-36, -2.456783193218581e-36, -1.6550960459577808e-35, -5.524823448864751e-36, -2.6918820633830384e-36, 6.676807912670593e-36, -1.2695338988880705e-35, 1.0814548027565045e-36, 6.112570624275895e-36, -4.7489971773220415e-36, -2.456783193218581e-36, 6.159590398308787e-36, -1.3541694921472752e-35, -4.2787994369931265e-36, 7.805282489459989e-36, 6.112570624275895e-36, 9.874152546907215e-36, -3.92615113174644e-36, -1.7491355940235638e-35, -6.065550850243004e-36, -1.4482090402130582e-35, -1.4223481644949679e-36, 5.5953531099140885e-36, -8.510579099953362e-36, 1.1801963282255767e-35, 7.581938562803754e-37, -1.2977457633078054e-35, 2.7624117244323756e-36, -1.4670169498262148e-35, -1.2460240118716248e-36, -2.5978425153172554e-36, 3.6205226005326455e-36, 4.966463632224165e-37, -3.549992939483308e-36, 1.3353615825341186e-35, 5.995021189193666e-36, 1.034435028723613e-35, 9.309915258512517e-36, 4.70197740328915e-36, -4.255289549976681e-36, -1.0062231643038781e-35, -1.357695975199742e-36, 1.2695338988880705e-35, 8.886737292216494e-36, -8.839717518183602e-36, 1.2601299440814922e-35, -6.347669494440353e-36, 2.915225990039273e-36, -7.93458686805044e-37, -3.408933617384634e-36, 6.25362994637457e-36, -1.3259576277275403e-35, 1.4870003537901937e-36, -4.8430367253878245e-36, 1.3635734469538535e-35, -6.817867234769268e-36, -9.827132772874324e-36, 3.832111583680657e-36, -6.112570624275895e-36, 3.079795199154393e-36, 7.005946330900834e-36, 7.617203393328423e-36, -6.065550850243004e-36, -2.3862535321692436e-36, 5.5013135618483055e-36, -5.383764126766077e-36, -1.96307556587322e-36, 5.289724578700294e-36, 1.43880508540648e-35, -1.3077374652897949e-37, -1.6603857705364811e-37, 2.0923799444636718e-36, -2.2005254247393222e-35, -2.621352402333701e-36, 6.676807912670593e-36, -6.723827686703485e-36, 4.70197740328915e-36, -1.189600283032155e-35, -4.6549576292562585e-36, -3.6205226005326455e-36, 2.2804590405952378e-36, 6.25362994637457e-36, -1.3259576277275403e-35, -6.817867234769268e-36, 8.745677970117819e-36, 1.774996469741654e-36, 9.356935032545409e-36, -1.0062231643038781e-35, 5.148665256601619e-36, 1.1990042378387333e-35, -4.3728389850589095e-36, 5.3132344657167395e-36, 5.252990380237097e-38, -6.065550850243004e-36, 4.208269775943789e-36, 1.7021158199906723e-35, -4.53740819417403e-36, -4.984096047486499e-36, 1.288341808501227e-35, 1.786751413249877e-36, 1.4199971757933233e-35, -7.993361585591555e-36, 1.598672317118311e-35, 1.0767528253532154e-35, -2.8682062160063815e-36, -1.692711865184094e-36, -1.3541694921472752e-35, 4.86654661240427e-36, -4.53740819417403e-36, -1.3259576277275403e-35, -8.79269774415071e-36, -1.2107591813469561e-36, 9.592033902709866e-36, 5.3132344657167395e-36, 1.2037062152420224e-35, -3.76158192263132e-36, -2.245194210070569e-36, 2.3603926564511533e-35, 1.0485409609334805e-35, 7.993361585591555e-36, -7.617203393328423e-36, -3.973170905779332e-36, 5.853961867094992e-36, 8.228460455756013e-36, 3.8086016966642115e-36, 2.3274788146281293e-36, 1.1190706219828177e-35, 5.995021189193666e-36, -1.095560734966372e-35, -5.348499296241408e-37, 6.723827686703485e-36, -1.0626468931433479e-35, 9.497994354644083e-36, 5.948001415160775e-36, 7.85230226349288e-36, -5.242704804667402e-36, 3.9966807927957775e-36, -6.018531076210112e-36, 7.147005652999508e-36, -2.997510594596833e-36, -6.91190678283505e-36, -8.745677970117819e-36, 3.5970127135162e-36, -4.325819211026018e-36, -6.629788138637702e-36, -1.88079096131566e-35, 1.1707923734189984e-35, -5.524823448864751e-36, -1.9983403963978888e-36, 1.222514124855179e-35, -1.0485409609334805e-35, -3.314894069318851e-36, -1.1990042378387333e-35, 9.686073450775649e-36, 9.262895484479626e-36, -2.3980084756774665e-36, 4.4668785331246925e-36, 5.64237288394698e-36, 6.958926556867942e-36, -6.25362994637457e-36, 3.3384039563352965e-36, -1.189600283032155e-35, -8.275480229788904e-36, -1.680956921675871e-36, -1.7021158199906723e-35, 1.316553672920962e-35, -3.5970127135162e-36, 5.612985525176423e-37, -8.46355932592047e-36, -3.644032487549091e-36, -1.1143686445795286e-35, 3.361913843351742e-36, -2.4920480237432495e-36, 3.7145621485984285e-36, 3.173834747220176e-36, -8.839717518183602e-36, 4.984096047486499e-36, 1.0579449157400588e-35, 6.347669494440353e-36, -8.369519777854687e-36, -4.725487290305596e-36, 1.8619830517025034e-35, -3.949661018762886e-36, -1.3259576277275403e-35, 4.796016951354933e-36, -7.711242941394206e-36, 2.747718045047097e-37, 1.1402295202976189e-36, 1.96307556587322e-36, 2.71833068627654e-37, -1.0579449157400588e-35, -1.2930437859045163e-36, -5.0311158215193905e-36, 1.1931267660846218e-36, -1.222514124855179e-35, 7.946341811558664e-36, 4.86654661240427e-36, -9.827132772874324e-36, 9.545014128676975e-36, -5.5013135618483055e-36, 3.6205226005326455e-36, -9.827132772874324e-36, 1.0203290965137456e-35, -1.626884181538046e-35, -8.369519777854687e-36, -2.644862289350147e-36, -8.557598873986253e-36, -9.686073450775649e-36, 4.255289549976681e-36, -1.570460452698576e-35, -3.96729343402522e-37, 9.051306501331614e-37, 7.993361585591555e-36, -1.034435028723613e-35, 2.245194210070569e-36, -1.0461899722318359e-36, -1.0485409609334805e-35, -6.770847460736376e-36, -5.242704804667402e-36, 3.636685647856452e-38, -6.535748590571919e-36, -1.6574470346594254e-36, -2.7036370068912613e-37, -5.5953531099140885e-36, 1.344765537340697e-35, 2.0124463286077562e-35, -1.1190706219828177e-35, 2.915225990039273e-36, -1.3283086164291849e-36, 1.8431751420893468e-35, -1.476420904632793e-35, -9.686073450775649e-36, -4.419858759091801e-36, -1.3459410316915192e-36, -1.1472824864025526e-35, 1.2131101700486007e-35, 7.85230226349288e-36, 1.1990042378387333e-36, -7.241045201065291e-36, 6.347669494440353e-36, -5.360254239749631e-36, 8.745677970117819e-36, -7.52316384526264e-36, -6.58276836460481e-36, -1.189600283032155e-35, -2.3744985886610208e-36, -9.592033902709866e-36, -2.268704097087015e-36, -2.621352402333701e-36, 9.827132772874324e-36, 7.052966104933725e-36, 3.314894069318851e-36, 1.4294011305999016e-35, 7.570183619295532e-36, 6.817867234769268e-36, 6.817867234769268e-36, 1.8901949161222383e-35, -4.2787994369931265e-36, -1.0109251417071673e-35, -9.686073450775649e-36, -1.3259576277275403e-35, 1.0391370061269022e-35, -6.676807912670593e-36, -9.497994354644083e-36, -8.510579099953362e-36, -1.0203290965137456e-35, -1.3106762011668506e-36, 7.099985878966617e-36, 2.73890183741593e-36, 1.598672317118311e-36, 1.263656427133959e-36, -1.586917373610088e-36, -1.7115197747972506e-35, 4.319941739271907e-37, -1.692711865184094e-36, -1.9043008483321058e-36, -2.621352402333701e-36, 6.171345341817009e-37, -1.7279766957087626e-36, -4.5609180811904755e-36, 9.639053676742758e-37, 1.2037062152420224e-35, -5.195685030634511e-36, -2.1981744360376776e-36, 4.913566386437162e-36, -2.2099293795459005e-36, -3.173834747220176e-36, 4.2787994369931265e-36, 1.3635734469538535e-35, 1.0579449157400588e-35, 1.2319180796617573e-35, -2.245194210070569e-36, 1.504632769052528e-35, 2.5978425153172554e-36, -4.796016951354933e-36, 5.524823448864751e-36, 6.488728816539027e-36, 1.0109251417071673e-35, -1.1707923734189984e-35, -1.9654265545748647e-35, 7.52316384526264e-36, -1.8713870065090817e-35, 1.1402295202976189e-36, 9.844765188136658e-38, 4.8430367253878245e-36, 9.686073450775649e-36, -1.2977457633078054e-35, 1.3353615825341186e-35, -1.2131101700486007e-35, 6.488728816539027e-36, -2.8446963289899358e-36, 2.8446963289899358e-36, -1.4294011305999016e-35, 7.382104523163966e-36, -9.497994354644083e-36, 1.1660903960157092e-35, -4.678467516272704e-36, 9.07481638834806e-36, -8.322500003821796e-36, 9.356935032545409e-36, -2.997510594596833e-36, 1.3917853113735884e-35, 1.3259576277275403e-35, 2.527312854267918e-36, 4.86654661240427e-36, 5.64237288394698e-36, -8.839717518183602e-36, 1.9395656788567744e-37, -1.1637394073140646e-36, 6.347669494440353e-36, 7.899322037525772e-36, -1.6080762719248893e-35, 4.760752120830264e-37, -4.772507064338487e-36, 8.087401133657338e-36, 1.189600283032155e-35, 6.535748590571919e-36, 1.43880508540648e-35, -9.686073450775649e-36, 1.2037062152420224e-35, 7.805282489459989e-36, 3.009265538105056e-36, 1.3867159919856673e-38, 3.738072035614874e-36, 2.339233758136352e-36, -8.839717518183602e-36, -1.1190706219828177e-35, -1.6550960459577808e-35, 9.07481638834806e-36, 6.629788138637702e-36, 8.698658196084928e-36, 6.488728816539027e-36, -5.524823448864751e-36, 3.855621470697103e-36, 8.46355932592047e-36, -2.8446963289899358e-36, 3.009265538105056e-36, -7.52316384526264e-36, 3.471381754772068e-38, -3.973170905779332e-36, -7.946341811558664e-36, -5.113400426076951e-37, 1.2871663141504048e-36, -2.915225990039273e-36, -2.7506567809241528e-36, -4.8430367253878245e-36, 8.557598873986253e-36, -3.385423730368188e-36, 1.2695338988880705e-35, -3.8086016966642115e-36, -1.1225971050352846e-36, -2.433273306202135e-36, -2.1393997184965633e-36, -1.0109251417071673e-35, -1.1190706219828177e-35, -3.3384039563352965e-36, -2.4215183626939123e-36, -4.255289549976681e-36, 7.099985878966617e-36, 3.5970127135162e-36, 8.886737292216494e-36, -6.065550850243004e-36, -4.984096047486499e-36, 8.632536638851174e-38, 2.0100953399061116e-36, 9.07481638834806e-36, -4.114230227878006e-36, -3.220854521253068e-36, 2.5860875718090325e-36, 5.319111937470851e-37, 3.032775425121502e-36, 7.617203393328423e-36, 7.617203393328423e-36, 6.347669494440353e-36, -8.510579099953362e-36, 5.830451980078546e-36, -4.9370762734536075e-37, 5.900981641127883e-36, 6.065550850243004e-36, -5.583598166405866e-37, 6.864887008802159e-36, -9.356935032545409e-36, 6.958926556867942e-36, -2.268704097087015e-36, 1.1472824864025526e-35, 3.644032487549091e-36, 1.9513206223649973e-36, -7.93458686805044e-37, 8.557598873986253e-36, -1.4199971757933233e-35, -3.832111583680657e-36, 4.396348872075355e-36, -1.692711865184094e-35, -2.8917161030228273e-36, 1.946618644961708e-35, 1.2131101700486007e-35, 5.736412432012763e-36, -7.946341811558664e-36, 8.322500003821796e-36, 5.430783900798968e-36, -1.1461069920517303e-36, -5.289724578700294e-36, 3.361913843351742e-36, 1.3259576277275403e-35, -6.300649720407461e-36, 6.817867234769268e-36, -3.502973165450417e-36, 4.1847598889273435e-36, -2.82118644197349e-36, 9.968192094972998e-36, -1.0485409609334805e-35, 1.570460452698576e-35, 1.3259576277275403e-35, -1.0626468931433479e-35, -1.6362881363446242e-35, -1.4952288142459497e-35, 1.892545904823883e-36, -1.0297330513203239e-35, 6.723827686703485e-36, -1.598672317118311e-35, 7.85230226349288e-36, -8.816207631167156e-38, 2.0424214345537245e-37, -1.1002627123696611e-35, -6.906029311080939e-37, 9.697828394283872e-37, 4.2787994369931265e-36, 4.020190679812223e-36, 1.0403125004777244e-36, 3.361913843351742e-36, 7.335084749131074e-36, 5.571843222897643e-36, 1.5140367238591063e-35, 1.0297330513203239e-35, 3.7145621485984285e-36, 1.0626468931433479e-35, -9.592033902709866e-36, -1.6080762719248893e-35, -8.510579099953362e-36, -2.5978425153172554e-36, 1.0297330513203239e-35, 7.335084749131074e-36, 4.114230227878006e-36, -6.629788138637702e-36, -4.9370762734536075e-36, 9.07481638834806e-36, 3.92615113174644e-36, 1.9184067805419732e-35, 5.101645482568728e-36, -3.879131357713549e-36, 4.2787994369931265e-36, 2.915225990039273e-36, 7.85230226349288e-36, -3.220854521253068e-36, -1.1660903960157092e-35, -9.968192094972998e-36, 3.5970127135162e-36, -1.7115197747972506e-35, -6.300649720407461e-36, 3.122406869371701e-39, 7.817037432968212e-37, -6.441709042506136e-36, 8.651638422052036e-36, 1.189600283032155e-35, 7.005946330900834e-36, 5.101645482568728e-36, 4.043700566828669e-36, -6.25362994637457e-36, 2.0100953399061116e-36, 2.550822741284364e-36, 5.054625708535836e-36, 5.360254239749631e-36, -1.9160557918403286e-36, -2.256949153578792e-36, 3.438320976155191e-37, 4.8430367253878245e-36, 4.3728389850589095e-36, 6.018531076210112e-36, 1.128474576789396e-36, -3.314894069318851e-36, 8.816207631167156e-37, -7.946341811558664e-36, 1.4576129950196365e-35, -2.339233758136352e-36, -5.172175143618065e-36, -8.228460455756013e-36, -1.0485409609334805e-35, -5.830451980078546e-36, 6.817867234769268e-36, 1.4576129950196365e-35, -2.277520304718182e-37, -1.1343520485435074e-36, -1.6456920911512025e-35, 2.997510594596833e-36, -1.0767528253532154e-35, 2.4215183626939123e-36, -4.1847598889273435e-36, -6.065550850243004e-36, 4.026068151566335e-37, 2.1276447749883404e-36, -9.844765188136658e-38, 5.736412432012763e-36, -5.054625708535836e-36, -4.020190679812223e-36, 3.0562853121379475e-36, -2.6566172328583698e-36, -4.913566386437162e-36, 9.827132772874324e-36, 1.5234406786656846e-35, 4.231779662960235e-36, 1.680956921675871e-36, -6.629788138637702e-36, -5.995021189193666e-36, 1.5398975995771966e-36, 2.5126191748826395e-37, 1.692711865184094e-36, 3.126814973187285e-36, -8.228460455756013e-36, -4.208269775943789e-36, 3.76158192263132e-36, -4.208269775943789e-36, -4.8430367253878245e-36, -4.86654661240427e-36, -1.410593220986745e-35, 4.1847598889273435e-36, -4.8430367253878245e-36, 2.997510594596833e-36, 7.758262715427098e-36, 7.273371295712904e-38, 2.715391950399484e-36, -4.419858759091801e-36, -5.571843222897643e-36, 4.70197740328915e-36, 1.0109251417071673e-35, -1.3259576277275403e-35, -1.4199971757933233e-35, 6.347669494440353e-36, -3.103305086170839e-36, 3.92615113174644e-36, -1.1378785315959743e-35, -1.128474576789396e-35, 1.2601299440814922e-35, -1.3929608057244107e-36, -9.8153778293661e-37, -1.095560734966372e-35, -1.2342690683634019e-36, -2.2334392665623463e-36, 3.9966807927957775e-37, 2.453844457341525e-37, 5.830451980078546e-36, 2.1158898314801175e-35, 2.0923799444636718e-36, 7.946341811558664e-36, -8.46355932592047e-36, 2.080625000955449e-36, 9.309915258512517e-36, 6.629788138637702e-36, 3.314894069318851e-36, -2.7976765549570443e-36, -2.245194210070569e-36, -2.621352402333701e-36, -6.770847460736376e-36, -1.570460452698576e-35, 1.1049646897729503e-35, 1.288341808501227e-35, 3.032775425121502e-36, 6.91190678283505e-36, -1.0203290965137456e-35, 5.430783900798968e-36, 1.6174802267314676e-35, -1.5798644075051544e-35, 6.159590398308787e-36, 1.0720508479499262e-35, -5.64237288394698e-36, 3.291384182302405e-36, -1.6692019781676483e-36, -9.027796614315168e-36, 4.020190679812223e-36, -1.1990042378387333e-35, -9.309915258512517e-36, 9.686073450775649e-36, -1.7021158199906723e-35, -8.595802440387977e-38, 1.1143686445795286e-35, -1.0861567801597937e-35, -4.4668785331246925e-36, 1.0579449157400588e-35, 2.8446963289899358e-36, -2.7976765549570443e-36, -4.437491174354135e-37, -5.995021189193666e-36, 1.2695338988880705e-35, 1.1002627123696611e-35, -4.055455510336892e-37, 7.52316384526264e-36, -5.3132344657167395e-36, 6.394689268473244e-36, -6.112570624275895e-36, -3.549992939483308e-36, -1.3929608057244107e-36, -6.817867234769268e-36, 7.570183619295532e-36, 8.287235173297127e-37, 1.2601299440814922e-35, 2.3274788146281293e-36, 2.8446963289899358e-36, -2.5860875718090325e-36, -1.4294011305999016e-35, -8.980776840282277e-36, 3.314894069318851e-36, 8.651638422052036e-36, -4.043700566828669e-36, 1.6174802267314676e-35, 7.429124297196857e-36, 4.796016951354933e-36, 4.1847598889273435e-36, 1.5610564978919978e-35, -1.1461069920517303e-36, 1.1801963282255767e-35, 2.5155579107596953e-36, 8.698658196084928e-36, 6.553381005834253e-37, 5.172175143618065e-36, 2.0124463286077562e-35, -5.054625708535836e-36, 1.8337711872827685e-35, 2.362743645152798e-36, -5.524823448864751e-36, 7.617203393328423e-36, 1.5610564978919978e-35, 4.231779662960235e-36, -5.47780367483186e-36, -8.745677970117819e-36, -6.876641952310382e-37, 8.963144425019942e-38, -1.1801963282255767e-35, -6.112570624275895e-36, -7.493776486492083e-37, -1.0109251417071673e-35, 8.228460455756013e-36, 2.3980084756774665e-36, -2.0718087933242817e-37, -9.827132772874324e-36, 3.1503248602037305e-36, -6.676807912670593e-36, -2.2804590405952378e-36, -3.408933617384634e-36, -2.82118644197349e-36, -1.3224311446750734e-36, 8.557598873986253e-36, 1.1472824864025526e-35, 9.262895484479626e-36, -3.314894069318851e-36, -7.147005652999508e-36, 1.0861567801597937e-35, -1.3353615825341186e-35, -6.770847460736376e-36, -5.995021189193666e-36, 8.839717518183602e-36, -1.0297330513203239e-35, 1.4199971757933233e-35, 7.617203393328423e-36, -1.6362881363446242e-35, -1.7632415262334313e-36, -1.4670169498262148e-35, 3.502973165450417e-36, -6.37705685321091e-37, 1.5140367238591063e-35, 4.349329098042464e-36, -6.018531076210112e-36, -1.3635734469538535e-35, 8.79269774415071e-36, 5.965633830423109e-37, 5.84808439534088e-37, 1.3635734469538535e-35, 7.946341811558664e-36, -1.189600283032155e-35, 6.58276836460481e-36, -6.958926556867942e-36, -7.147005652999508e-36, 1.4294011305999016e-35, 4.161250001910898e-36, -1.9395656788567744e-36, -1.1707923734189984e-35, 1.1990042378387333e-36, 6.629788138637702e-36, 2.915225990039273e-36, -6.25362994637457e-36, 4.725487290305596e-36, -2.080625000955449e-36, 3.667542374565537e-36, 4.231779662960235e-36, 9.121836162380951e-36, -1.3106762011668506e-36, 8.79269774415071e-36, -4.137740114894452e-36, 9.991701981989444e-38, -5.383764126766077e-36, -5.0311158215193905e-36, -1.0062231643038781e-35, -4.678467516272704e-36, 1.598672317118311e-36, -5.736412432012763e-36, -9.07481638834806e-36, 4.325819211026018e-36, 3.9026412447299945e-36, 2.1041348879718946e-36, -1.1472824864025526e-35, 7.288064975098183e-36, 2.1864194925294548e-36, -1.1801963282255767e-35, 2.82118644197349e-36, -1.6339371476429796e-36, 9.8153778293661e-37, 1.0626468931433479e-35, -9.168855936413843e-36, 3.408933617384634e-36, 2.8917161030228273e-36, 7.052966104933725e-36, -1.6362881363446242e-35, 2.715391950399484e-36, 9.286405371496071e-37, -8.604618648019145e-36, 3.2443644082695135e-36, -6.723827686703485e-36, -7.899322037525772e-36, 4.607937855223367e-36, 1.476420904632793e-35, 6.629788138637702e-36, 5.5953531099140885e-36, -7.567244883418476e-38, 4.53740819417403e-36, -1.1566864412091309e-35, 1.0109251417071673e-35, -5.054625708535836e-36, 5.242704804667402e-36, 2.4685381367268038e-36, -1.8102613002663228e-36, 4.6549576292562585e-36, 8.698658196084928e-36, 2.3744985886610208e-36, -5.47780367483186e-36, -1.7279766957087626e-36, -3.785091809647766e-36, -6.488728816539027e-36, -1.1801963282255767e-35, -9.91823358506305e-38, 2.7506567809241528e-36, 7.335084749131074e-36, 9.262895484479626e-36, -1.0062231643038781e-35, -6.488728816539027e-36, 1.6550960459577808e-35, 4.1847598889273435e-36, -6.318282135669795e-37, -4.5609180811904755e-36, -8.510579099953362e-36, -1.410593220986745e-36, -3.0562853121379475e-36, 5.148665256601619e-36, -4.775445800215543e-38, -5.6893926579798715e-36, -1.410593220986745e-35, 1.3283086164291849e-36, -3.502973165450417e-36, -1.288341808501227e-35, 1.1002627123696611e-35, -4.443368646108247e-36, -1.704466808692317e-36, 1.4576129950196365e-35, -2.3862535321692436e-36, -2.1535056507064307e-35, 4.631447742239813e-36, -2.433273306202135e-36, -3.785091809647766e-36, 9.827132772874324e-36, -1.504632769052528e-36, -6.159590398308787e-36, 9.4039548065783e-36, -2.3321807920314184e-35, 3.9966807927957775e-36, 2.5860875718090325e-36, 1.0109251417071673e-35, 1.3635734469538535e-35, 1.4670169498262148e-35, 1.5610564978919978e-35, -9.545014128676975e-36, -6.206610172341678e-36, -9.309915258512517e-36, -9.07481638834806e-36, 4.173004945419121e-37, 1.7397316392169855e-36, 2.268704097087015e-36, -2.7976765549570443e-36, -1.1872492943305104e-36, -4.984096047486499e-36, -5.383764126766077e-36, 5.853961867094992e-36, -7.288064975098183e-36, -2.4920480237432495e-36, 9.356935032545409e-36, 4.490388420141138e-36, -7.099985878966617e-36, 1.5610564978919978e-35, -9.07481638834806e-36, -4.678467516272704e-36, 1.0203290965137456e-35, -9.262895484479626e-36, 1.598672317118311e-36, -3.691052261581983e-36, -7.993361585591555e-36, 7.476144071229749e-36, 9.592033902709866e-36, 7.382104523163966e-36, 3.92615113174644e-36, -4.3728389850589095e-36, -5.3132344657167395e-36, -8.639883478543813e-37, 1.0990872180188388e-36, -7.147005652999508e-36, -6.065550850243004e-36, 4.725487290305596e-36, -1.9395656788567744e-36, -6.91190678283505e-36, -4.86654661240427e-36, -8.557598873986253e-36, -4.137740114894452e-36, 5.3132344657167395e-36, -3.92615113174644e-36, 1.0520674439859473e-36, 1.0203290965137456e-35, 2.0923799444636718e-36, 1.626884181538046e-35, 9.827132772874324e-36, 6.906029311080939e-37, -9.93292726444833e-37, 2.2922139841034606e-36, 2.7624117244323756e-36, -5.289724578700294e-36, 3.267874295285959e-36, 9.521504241660529e-37, 3.173834747220176e-36, 2.0594661026406477e-35, -5.3132344657167395e-36, 2.644862289350147e-36, -2.6918820633830384e-36, -5.6893926579798715e-36, 6.817867234769268e-36, -2.8446963289899358e-36, -4.86654661240427e-36, 1.263656427133959e-36, -6.817867234769268e-36, 1.0203290965137456e-35, 1.0767528253532154e-35, 1.2107591813469561e-36, 7.229290257557068e-37, -4.584427968206921e-36, 9.07481638834806e-36, 8.228460455756013e-36, 7.2880649750981825e-37, 4.513898307157584e-36, 7.52316384526264e-37, -2.915225990039273e-36, 1.786751413249877e-36, -3.5970127135162e-36, -6.770847460736376e-36, 1.7115197747972506e-35, -1.9983403963978888e-36, 2.433273306202135e-36, 9.73309322480854e-36, -1.1707923734189984e-35, -6.535748590571919e-36, 9.286405371496071e-37, 4.796016951354933e-36, 7.617203393328423e-36, -2.256949153578792e-36, -5.5013135618483055e-36, -7.052966104933725e-36, -2.0923799444636718e-36, -7.335084749131074e-36, -1.664500000764359e-35, 5.6893926579798715e-36, -9.697828394283872e-37, -1.96307556587322e-36, 2.4215183626939123e-36, 5.571843222897643e-36, -4.8430367253878245e-36, -1.1660903960157092e-35, 5.900981641127883e-36, -3.549992939483308e-36, -2.0100953399061116e-36, -1.5516525430854195e-35, -5.7834322060456545e-36, -1.6339371476429796e-36, -8.933757066249385e-36, 1.570460452698576e-35, -4.772507064338487e-36, 1.4199971757933233e-35, 9.168855936413843e-36, 2.7976765549570443e-36, -3.502973165450417e-36, -1.0285575569695016e-36, -5.7834322060456545e-36, -3.408933617384634e-36, 3.667542374565537e-36, 9.545014128676975e-36, 6.91190678283505e-36, 7.664223167361315e-36, -1.4752454102819708e-36, 1.4458580515114136e-36, 3.849743998942992e-37, -2.527312854267918e-36, 4.86654661240427e-36, 7.758262715427098e-36, -1.6574470346594254e-36, 5.383764126766077e-36, 1.3071497181143837e-35, -1.1660903960157092e-35, -7.617203393328423e-36, -4.984096047486499e-36, 5.054625708535836e-37, -6.876641952310382e-37, -9.356935032545409e-36, -1.275411370642182e-36, 7.85230226349288e-36, 4.2787994369931265e-36, 6.612155723375367e-37, -1.316553672920962e-35, -1.2319180796617573e-35, -3.267874295285959e-36, 2.926980933547496e-36, 1.2695338988880705e-35, 5.289724578700294e-36, -1.3541694921472752e-35, 1.2695338988880705e-35, 1.7491355940235638e-35, 4.319941739271907e-37, -2.456783193218581e-36, -1.0391370061269022e-35, -1.88079096131566e-36, -7.899322037525772e-36, 9.545014128676975e-36, 5.995021189193666e-36, 2.9828169152115546e-37, -1.986585452889666e-36, 3.5970127135162e-36, 9.592033902709866e-36, -9.215875710446734e-36, -5.172175143618065e-36, 4.513898307157584e-36, -9.592033902709866e-36, 2.5126191748826395e-37, 2.997510594596833e-36, 1.2695338988880705e-35, -1.2977457633078054e-35, -8.557598873986253e-36, 1.0638223874941702e-36, 1.4011892661801667e-35, 4.6549576292562585e-36, 9.07481638834806e-36, 4.643202685748036e-37, -5.5953531099140885e-36, -2.0218502834143345e-36, 3.855621470697103e-36, -1.288341808501227e-35, 6.441709042506136e-36, 2.5743326283008096e-36, -1.2166366531010676e-36, 7.993361585591555e-36, -1.4011892661801667e-35, 5.64237288394698e-36, -5.995021189193666e-36, -6.770847460736376e-36, -7.335084749131074e-36, -2.3980084756774665e-36, -3.009265538105056e-36, 9.4039548065783e-36, 9.686073450775649e-36, 1.4870003537901937e-36, 9.521504241660529e-37, -3.197344634236622e-36, 4.9370762734536075e-36, -1.2601299440814922e-35, -1.0626468931433479e-35, -4.231779662960235e-36, -7.899322037525772e-36, 1.2166366531010676e-36, -1.3459410316915192e-36, -7.85230226349288e-36, 9.545014128676975e-36, 7.1940254270324e-36, 1.0062231643038781e-35, -5.948001415160775e-36, -4.349329098042464e-36, 8.745677970117819e-36, -2.057115113939003e-36, 1.344765537340697e-35, 1.504632769052528e-36, 1.369450918707965e-36, 3.855621470697103e-36, 3.197344634236622e-36, 8.287235173297127e-37, 7.640713280344869e-37, 7.005946330900834e-36, -8.040381359624447e-36, 3.197344634236622e-36, -5.5953531099140885e-36, 2.7859216114488214e-36, 5.47780367483186e-36, -4.4668785331246925e-37, 4.5609180811904755e-36, 3.103305086170839e-36, 2.256949153578792e-36, -5.64237288394698e-36, 9.73309322480854e-36, 6.629788138637702e-36, -1.814963277669612e-35, 4.5609180811904755e-36, -6.465218929522581e-38, 5.524823448864751e-36, 3.549992939483308e-36, -5.995021189193666e-36, 1.2166366531010676e-36, 5.383764126766077e-36, 2.527312854267918e-37, -2.915225990039273e-36, 1.504632769052528e-35, 1.6692019781676483e-36, -2.915225990039273e-36, -1.222514124855179e-35, -5.64237288394698e-36, 7.993361585591555e-36, 2.3039689276116835e-36, 1.96307556587322e-36, 2.456783193218581e-36, 2.926980933547496e-36, 7.52316384526264e-36, 2.915225990039273e-36, -6.488728816539027e-36, -6.535748590571919e-36, 5.571843222897643e-36, 1.2131101700486007e-35, 2.4685381367268038e-36, 3.385423730368188e-36, -1.5516525430854195e-35, -8.79269774415071e-36, -3.573502826499754e-36, -1.3729774017604318e-35, 9.309915258512517e-36, 1.4952288142459497e-35, 6.318282135669795e-38, -1.2131101700486007e-35, -1.0720508479499262e-35, 8.369519777854687e-36, 1.3635734469538535e-35, -1.0861567801597937e-35, 1.504632769052528e-35, -5.64237288394698e-36, 7.147005652999508e-36, 3.92615113174644e-36, -1.586917373610088e-36, 1.250725989274914e-35, 5.172175143618065e-36, -3.032775425121502e-36, -3.8086016966642115e-36, -3.032775425121502e-36, 5.430783900798968e-36, 9.168855936413843e-36, 2.5155579107596953e-36, 4.2787994369931265e-36, -1.4011892661801667e-35, 1.598672317118311e-36, -4.5609180811904755e-36, -1.0720508479499262e-35, -2.6918820633830384e-36, -5.195685030634511e-36, 1.7021158199906723e-35, 1.189600283032155e-35, 1.128474576789396e-35, -6.676807912670593e-36, 1.288341808501227e-35, -6.994191387392611e-37, 1.0062231643038781e-35, 8.181440681723121e-36, -6.723827686703485e-36, -2.82118644197349e-36, -6.91190678283505e-36, -1.4294011305999016e-35, -1.814963277669612e-35, 3.6440324875490913e-37, 3.126814973187285e-36, 9.8153778293661e-37, 2.3069076634887392e-37, 9.497994354644083e-36, -5.830451980078546e-36, -5.363192975626687e-38, -1.1660903960157092e-35, -6.018531076210112e-36, -7.617203393328423e-36, 2.5743326283008096e-36, 9.592033902709866e-36, 2.4685381367268038e-36, -9.262895484479626e-36, -9.262895484479626e-36, -1.4482090402130582e-35, 8.980776840282277e-36, -3.785091809647766e-36, 1.3259576277275403e-35, -1.4752454102819708e-36, 3.173834747220176e-36, -2.1276447749883404e-36, 9.874152546907215e-36, 3.314894069318851e-36, -6.347669494440353e-36, 5.853961867094992e-36, -6.535748590571919e-36, -8.485599844998388e-38, 7.241045201065291e-36, -5.830451980078546e-36, -1.1990042378387333e-35, -1.3071497181143837e-35, -9.309915258512517e-36, 9.521504241660529e-37, 1.7632415262334313e-36, -5.6893926579798715e-36, 3.079795199154393e-36, -8.369519777854687e-36, 1.0485409609334805e-35, 4.53740819417403e-36, 1.0062231643038781e-35, 6.864887008802159e-36, 7.335084749131074e-36, 8.181440681723121e-36, 6.58276836460481e-36, 3.4530146555404696e-37, -1.4294011305999016e-35, 1.1990042378387333e-35, 1.0626468931433479e-35, -7.147005652999508e-36, -6.018531076210112e-36, 1.0485409609334805e-35, -1.3812058622161878e-36, 3.4324435044010795e-36, -5.383764126766077e-36, -4.302309324009572e-36, -6.065550850243004e-36, 6.723827686703485e-36, 9.4039548065783e-36, -4.2787994369931265e-36, 1.2977457633078054e-35, -5.195685030634511e-36, 5.172175143618065e-37, 5.0311158215193905e-36, -4.678467516272704e-36, -9.827132772874324e-36, 1.6080762719248893e-35, -9.07481638834806e-36, 2.5155579107596953e-36, 1.7491355940235638e-35, 1.095560734966372e-35, -6.817867234769268e-36, -1.3988382774785221e-36, 7.899322037525772e-36, -4.2787994369931265e-36, 5.6893926579798715e-36, 6.91190678283505e-36, -4.0907203408615605e-36, 1.1002627123696611e-35, 1.0720508479499262e-35, -1.0932097462647274e-36, 1.0720508479499262e-35, 2.256949153578792e-36, 6.58276836460481e-36, -1.2131101700486007e-35, 6.112570624275895e-36, 3.6205226005326455e-36, 1.1707923734189984e-35, 6.159590398308787e-36, 7.147005652999508e-36, -4.984096047486499e-36, 8.698658196084928e-36, -7.493776486492083e-37, 5.360254239749631e-36, 1.6739039555709374e-35, 4.9370762734536075e-36, 6.553381005834253e-37, 9.168855936413843e-37, -8.040381359624447e-36, -2.5743326283008096e-36, 6.347669494440353e-36, -2.3862535321692436e-36, -3.879131357713549e-36, 8.416539551887579e-36, 3.408933617384634e-36, -1.1472824864025526e-35, -4.678467516272704e-36, -1.0297330513203239e-35, 2.938735877055719e-38, -5.172175143618065e-36, -1.5140367238591063e-35, -7.099985878966617e-36, -9.991701981989444e-37, -6.864887008802159e-36, 1.3738590225235485e-37, -9.592033902709866e-36, 6.394689268473244e-36, -1.4670169498262148e-35, 4.913566386437162e-36, -9.686073450775649e-36, -1.1225971050352846e-36, 1.8220162437745456e-36, 9.07481638834806e-36, -1.5516525430854195e-35, -5.995021189193666e-36, -3.291384182302405e-36, 4.796016951354933e-36, 1.4576129950196365e-36, 1.8055593228630336e-35, -1.8337711872827685e-35, 1.2789378536946488e-35, 9.592033902709866e-36, 6.347669494440353e-36, -7.335084749131074e-36, -7.4350017689509685e-37, 9.309915258512517e-36, 2.5978425153172554e-36, 1.7115197747972506e-35, 6.300649720407461e-36, 3.361913843351742e-36, 6.629788138637702e-36, -2.068870057447226e-36, -4.86654661240427e-36, 4.731364762059707e-37, 1.1049646897729503e-35, -8.698658196084928e-36, -4.86654661240427e-36, -6.58276836460481e-36, 9.051306501331614e-37, -1.0861567801597937e-35, -1.0062231643038781e-35, 7.241045201065291e-36, -1.2319180796617573e-35, 3.92615113174644e-36, 4.3728389850589095e-36, -1.4576129950196365e-36, -1.4576129950196365e-35, 1.6530389308438418e-38, 2.5860875718090325e-36, 2.3744985886610208e-36, -1.0109251417071673e-35, 7.85230226349288e-36, -1.4576129950196365e-36, -7.85230226349288e-36, 1.2131101700486007e-35, 9.874152546907215e-36, 3.032775425121502e-36, -1.986585452889666e-36, 1.2460240118716248e-36, 1.6574470346594254e-36, -1.4670169498262148e-35, 3.314894069318851e-36, -3.4824020143110267e-37, -1.222514124855179e-35, -1.9043008483321058e-36, -3.314894069318851e-36, -9.73309322480854e-36, 1.369450918707965e-36, 4.86654661240427e-36, -2.0453601704307803e-36, -1.9043008483321058e-36, 1.2977457633078054e-35, 8.275480229788904e-36, 7.052966104933725e-36, 9.345180089037186e-37, 1.2131101700486007e-35, 6.906029311080939e-37, 1.6456920911512025e-35, -1.3259576277275403e-35, -1.4670169498262148e-35, -1.7279766957087626e-36, 2.350988701644575e-36, 2.4685381367268038e-36, 5.6893926579798715e-36, 3.8086016966642115e-36, 7.099985878966617e-36, 2.4685381367268038e-36, 5.853961867094992e-36, 4.8430367253878245e-36, -7.241045201065291e-36, 4.678467516272704e-36, 1.3635734469538535e-35, 5.360254239749631e-36, 1.692711865184094e-35, 7.85230226349288e-36, -2.068870057447226e-36, -1.1167196332811731e-36, 4.396348872075355e-36, -6.723827686703485e-36, -4.255289549976681e-36, 3.92615113174644e-36, 7.099985878966617e-36, 6.206610172341678e-36, -2.068870057447226e-35, -4.419858759091801e-36, -2.6889433275059827e-37, 1.2695338988880705e-35, -2.527312854267918e-36, 9.874152546907215e-36, 2.204051907791789e-37, 8.933757066249385e-36, -7.147005652999508e-36, -1.6456920911512025e-36, 1.1225971050352846e-36, -7.4350017689509685e-37, -3.502973165450417e-36, -3.667542374565537e-36, -4.7489971773220415e-36, -7.335084749131074e-36, -1.0579449157400588e-35, 7.099985878966617e-36, -1.786751413249877e-35, -4.913566386437162e-36, -8.228460455756013e-36, -1.2166366531010676e-36, -1.3259576277275403e-35, 3.785091809647766e-36, 4.419858759091801e-36, 1.0861567801597937e-35, 9.874152546907215e-36, 1.1990042378387333e-36, -1.1378785315959743e-35, -1.5798644075051544e-35, -1.774996469741654e-36, 6.629788138637702e-36, 3.173834747220176e-36, -1.410593220986745e-36, 3.4530146555404696e-37, -1.2695338988880705e-35, -3.738072035614874e-36, 8.651638422052036e-36, 8.087401133657338e-36, -1.0861567801597937e-35, 2.1535056507064307e-35, -1.986585452889666e-36, 3.197344634236622e-36, 2.1599708696359533e-37, -3.2473031441465692e-37, 3.173834747220176e-36, 1.96307556587322e-36, -4.3728389850589095e-36, -3.291384182302405e-36, 7.617203393328423e-36, 2.0594661026406477e-35, -1.1990042378387333e-35, -5.242704804667402e-36, 1.8337711872827685e-36, -8.087401133657338e-36, -2.256949153578792e-36, -3.92615113174644e-36, 4.161250001910898e-36, 6.112570624275895e-36, -9.309915258512517e-36, -8.181440681723121e-36, 7.617203393328423e-36, -1.3259576277275403e-35, 3.408933617384634e-36, 9.262895484479626e-36, -3.408933617384634e-36, 8.322500003821796e-36, -3.573502826499754e-36, -4.984096047486499e-36, -3.614645128778534e-37, 1.0626468931433479e-35, 1.1566864412091309e-35, -7.570183619295532e-36, 1.275411370642182e-36, 3.6205226005326455e-36, 1.476420904632793e-35, 5.5013135618483055e-36, 2.456783193218581e-36, 9.968192094972998e-36, 8.839717518183602e-36, -9.686073450775649e-36, 3.738072035614874e-36, 3.3384039563352965e-36, -1.8619830517025034e-35, -3.691052261581983e-36, 6.629788138637702e-36, 1.8220162437745456e-36, 1.2789378536946488e-35, -4.2787994369931265e-36, 3.973170905779332e-36, 1.0109251417071673e-35, 3.7145621485984285e-36, 6.958926556867942e-36, -1.692711865184094e-35, -7.85230226349288e-36, -8.228460455756013e-36, 1.7985063567581e-36, 9.4039548065783e-36, -1.0579449157400588e-36, -3.879131357713549e-36, -2.433273306202135e-36, -8.698658196084928e-36, 1.1049646897729503e-35, 1.516387712560751e-36, -6.629788138637702e-36, -6.629788138637702e-36, -4.913566386437162e-36, 8.839717518183602e-36, 1.2413220344683356e-35, -3.197344634236622e-36, -5.524823448864751e-36, -5.830451980078546e-36, 2.0923799444636718e-36, -4.9370762734536075e-36, -7.993361585591555e-36, -5.948001415160775e-36]) + if inputs_embeds is not None: hidden_states = inputs_embeds else: hidden_states = self.get_input_embeddings(input_ids) + residual = None for i in range(len(self.layers)): layer = self.layers[i] @@ -290,7 +293,9 @@ def forward( attn_metadata, residual, ) + print(f"idx: {i}: {hidden_states}") hidden_states, _ = self.norm(hidden_states, residual) + print(hidden_states) return hidden_states From 88ba83b733f7ccfee66a18e591abc2ec40db3a7e Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 02:03:09 +0000 Subject: [PATCH 07/90] added static fp8 --- run_fp8.py | 5 + .../layers/quantization/fp8_static.py | 218 ++++++++++++++++++ vllm/model_executor/models/llama.py | 3 +- 3 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 run_fp8.py create mode 100644 vllm/model_executor/layers/quantization/fp8_static.py diff --git a/run_fp8.py b/run_fp8.py new file mode 100644 index 0000000000000..4b3833be8337e --- /dev/null +++ b/run_fp8.py @@ -0,0 +1,5 @@ +from vllm import LLM + +model = LLM("FriendliAI/Mistral-7B-Instruct-v0.2-fp8", quantization="fp8_static", enforce_eager=True, max_model_len=1024) +# model = LLM("mistralai/Mistral-7B-Instruct-v0.2", enforce_eager=True, max_model_len=1024) +print(model.generate("Hello my name is")) \ No newline at end of file diff --git a/vllm/model_executor/layers/quantization/fp8_static.py b/vllm/model_executor/layers/quantization/fp8_static.py new file mode 100644 index 0000000000000..1def8c0f9fcd3 --- /dev/null +++ b/vllm/model_executor/layers/quantization/fp8_static.py @@ -0,0 +1,218 @@ +from typing import Any, Dict, List, Optional, Tuple, Union + +import torch +from torch.nn.parameter import Parameter + +from vllm.model_executor.layers.linear import (LinearMethodBase, + set_weight_attrs) +from vllm.model_executor.layers.quantization.base_config import ( + QuantizationConfig) + + +class FP8StaticConfig(QuantizationConfig): + """Config class for FP8.""" + + @classmethod + def get_name(cls) -> str: + return "fp8_static" + + @classmethod + def get_supported_act_dtypes(cls) -> List[torch.dtype]: + return [torch.bfloat16, torch.half] + + @classmethod + def get_min_capability(cls) -> int: + return 89 + + @classmethod + def get_config_filenames(cls) -> List[str]: + return [] + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "FP8StaticConfig": + return cls() + + def get_linear_method(self) -> "Fp8LinearMethod": + return Fp8LinearMethod(self) + + def get_scaled_act_names(self) -> List[str]: + return [] + + +class Fp8LinearMethod(LinearMethodBase): + """Linear method for StaticFP8 + . + Args: + quant_config: The quantization config. + """ + + def __init__(self, quant_config: FP8StaticConfig): + self.quant_config = quant_config + + def create_weights( + self, + layer: torch.nn.Module, + input_size_per_partition: int, + output_partition_sizes: List[int], + input_size: int, + output_size: int, + params_dtype: torch.dtype, + **extra_weight_attrs, + ): + del input_size, output_size + + weight = Parameter(torch.empty(sum(output_partition_sizes), + input_size_per_partition, + dtype=torch.float8_e4m3fn), + requires_grad=False) + layer.register_parameter("weight", weight) + set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) + set_weight_attrs(weight, extra_weight_attrs) + + weight_scale = Parameter( + torch.empty( + len(output_partition_sizes), + device='cuda', dtype=torch.float32, + ), requires_grad=False + ) + layer.register_parameter("weight_scale", weight_scale) + set_weight_attrs(weight_scale, extra_weight_attrs) + set_weight_attrs(weight_scale, { + "shard_indexer": self.scales_shard_indexer, + }) + + in_scale = Parameter( + torch.empty( + len(output_partition_sizes), + device='cuda', dtype=torch.float32, + ), requires_grad=False + ) + layer.register_parameter("in_scale", in_scale) + set_weight_attrs(in_scale, extra_weight_attrs) + set_weight_attrs(in_scale, { + "shard_indexer": self.scales_shard_indexer, + }) + + layer.logical_widths = output_partition_sizes + + def shard_id_as_int( + self, + shard_id: Union[str, int] + ) -> int: + if isinstance(shard_id, int): + return shard_id + assert isinstance(shard_id, str) + qkv_idxs = { "q": 0, "k": 1, "v": 2 } + assert shard_id in qkv_idxs + return qkv_idxs[shard_id] + + # def scales_shard_splitter_NKK( + # self, + # param: torch.Tensor, + # loaded_weight: torch.Tensor, + # shard_id: Union[str, int], + # logical_widths: torch.Tensor + # ) -> Tuple[torch.Tensor, torch.Tensor]: + # shard_id = self.shard_id_as_int(shard_id) + # offset = sum(logical_widths[:shard_id]) + # size = logical_widths[shard_id] + # # update loaded weight with copies for broadcast. + # loaded_weight = loaded_weight.repeat(size) + # return param[offset : offset + size], loaded_weight + + def scales_shard_indexer( + self, + param: torch.Tensor, + loaded_weight: torch.Tensor, + shard_id: Union[str, int], + ) -> Tuple[torch.Tensor, torch.Tensor]: + # print(f"----- shard_id: {shard_id}") + # print(f"----- loaded_weight: {loaded_weight}") + return param[self.shard_id_as_int(shard_id)], loaded_weight + + def apply_weights( + self, + layer: torch.nn.Module, + x: torch.Tensor, + bias: Optional[torch.Tensor] = None + ) -> torch.Tensor: + logical_widths = layer.logical_widths + q_weight = layer.weight + w_scales = layer.weight_scale + in_scales = layer.in_scale + + output = torch.zeros(x.shape[0], q_weight.shape[0], dtype=x.dtype, device="cuda") + start_offset = 0 + for _, (logical_width, w_scale, in_scale) in enumerate(zip(logical_widths, w_scales, in_scales)): + end_offset = start_offset + logical_width + weight_dq = self._dequantize(q_weight[start_offset:end_offset, :], w_scale, x.dtype) + x_dq = self._fake_quantize_static(x, in_scale) + + # print(f"x_dq[0,0]: {x_dq[0,0]} // weight_dq[0,0]: {weight_dq[0,0]}") + output[:, start_offset:end_offset] = torch.nn.functional.linear(x_dq, weight_dq) + start_offset = end_offset + + assert end_offset == output.shape[1] + # print(output) + # print(output.dtype) + return output + + def _quantize_dynamic(self, x: torch.Tensor): + finfo = torch.finfo(torch.float8_e4m3fn) + min_val, max_val = x.aminmax() + amax = min_val.abs().max(max_val.abs()) + scale = finfo.max / amax.clamp(min=1e-12) + + # print(finfo.max) + # print(amax) + # print(finfo.max / amax.clamp(min=1e-12)) + # assert False + # scale and clamp the tensor to bring it to + # the representative range of float8 data type + # (as default cast is unsaturated) + qweight = (x * scale).clamp(min=finfo.min, max=finfo.max) + # Return both float8 data and the inverse scale (as float), + # as both required as inputs to torch._scaled_mm + # print(scale) + return qweight, scale.float().reciprocal() + + def _quantize(self, x: torch.Tensor, inv_scale: torch.tensor): + finfo = torch.finfo(torch.float8_e4m3fn) + return (x / inv_scale).clamp(min=finfo.min, max=finfo.max) + + def _dequantize(self, xq: torch.Tensor, inv_scale: torch.tensor, dtype: torch.dtype): + return (xq.to(dtype) * inv_scale) + + def _fake_quantize_static(self, x: torch.Tensor, inv_scale: torch.Tensor): + xq = self._quantize(x, inv_scale) + # xq, inv_scale = self._dynamic_quantize(x) + # print(inv_scale) + xdq = self._dequantize(xq, inv_scale, x.dtype) + + # print(f"----- inv_scale: {inv_scale} // x[0,0]: {x[0,0]} // xq[0,0]: {xq[0,0]} // xdq[0,0]: {xdq[0,0]}") + + return xdq + + +def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]: + """Quantize a tensor using per-tensor static scaling factor. + + Args: + tensor: The input tensor. + """ + finfo = torch.finfo(torch.float8_e4m3fn) + # Calculate the scale as dtype max divided by absmax. + # Since .abs() creates a new tensor, we use aminmax to get + # the min and max first and then calculate the absmax. + min_val, max_val = tensor.aminmax() + amax = min_val.abs().max(max_val.abs()) + scale = finfo.max / amax.clamp(min=1e-12) + # scale and clamp the tensor to bring it to + # the representative range of float8 data type + # (as default cast is unsaturated) + qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max) + # Return both float8 data and the inverse scale (as float), + # as both required as inputs to torch._scaled_mm + qweight = qweight.to(torch.float8_e4m3fn) + scale = scale.float().reciprocal() + return qweight, scale diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 22957a78cbc52..8b8679b9bc1d5 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -276,13 +276,12 @@ def forward( attn_metadata: AttentionMetadata, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: - self.embed_tokens.weight[0] = torch.Tensor([-2.0336052269225574e-36, 3.3207715410729622e-37, -1.5516525430854195e-35, 1.2319180796617573e-35, 1.2695338988880705e-35, 6.629788138637702e-36, -1.704466808692317e-36, 7.288064975098183e-36, -5.900981641127883e-36, 1.0626468931433479e-35, -9.4039548065783e-36, -1.1472824864025526e-35, -1.8337711872827685e-36, 1.3917853113735884e-35, -6.441709042506136e-36, -6.864887008802159e-36, 9.827132772874324e-36, -1.2283915966092904e-36, -2.915225990039273e-36, 9.051306501331614e-37, 7.617203393328423e-36, -6.441709042506136e-36, 5.830451980078546e-36, -1.4223481644949679e-36, -6.347669494440353e-36, 3.314894069318851e-36, -7.758262715427098e-36, -1.504632769052528e-36, 1.909002825735395e-35, 4.0907203408615605e-36, -5.430783900798968e-36, 4.255289549976681e-36, 5.360254239749631e-36, 1.1707923734189984e-35, -1.316553672920962e-35, -1.1472824864025526e-35, 4.70197740328915e-36, 1.288341808501227e-35, -2.7506567809241528e-36, -6.065550850243004e-36, 6.065550850243004e-36, 2.8446963289899358e-36, -6.488728816539027e-36, 4.2787994369931265e-36, 1.4011892661801667e-35, -1.2319180796617573e-35, -3.385423730368188e-36, -1.222514124855179e-35, 4.255289549976681e-36, 1.2871663141504048e-36, -1.814963277669612e-35, 8.416539551887579e-36, 3.7468882432460414e-37, 3.34281206015088e-38, -9.592033902709866e-36, -4.772507064338487e-36, 6.78847987599871e-37, 8.510579099953362e-36, 8.13442090769023e-36, -7.147005652999508e-36, 2.7624117244323756e-36, 3.7145621485984285e-36, -1.1872492943305104e-36, 2.0662986635548023e-38, 7.005946330900834e-36, 1.9513206223649973e-36, 1.0285575569695016e-36, -1.6692019781676483e-36, -4.255289549976681e-36, 3.8086016966642115e-36, -5.43666137255308e-37, -1.5398975995771966e-36, 4.255289549976681e-36, 4.302309324009572e-36, 1.9043008483321058e-36, -7.52316384526264e-36, 2.527312854267918e-36, 2.3980084756774665e-36, 4.607937855223367e-36, 9.262895484479626e-36, 1.2789378536946488e-35, -2.9387358770557188e-37, -1.9654265545748647e-35, 1.1660903960157092e-35, 4.913566386437162e-36, -4.043700566828669e-36, 1.5140367238591063e-35, 5.3132344657167395e-36, -5.172175143618065e-36, -2.6801271198748155e-36, 2.5155579107596953e-36, -1.034435028723613e-35, 7.241045201065291e-36, 3.314894069318851e-36, -2.9740007075803874e-36, 1.3753283904620764e-36, 9.968192094972998e-36, -5.571843222897643e-36, 2.8446963289899358e-36, 6.347669494440353e-36, 3.314894069318851e-36, 9.4039548065783e-36, 5.3132344657167395e-36, -2.3980084756774665e-36, -1.4670169498262148e-35, 1.1637394073140646e-36, 5.3132344657167395e-36, 4.114230227878006e-36, 6.770847460736376e-36, 1.8220162437745456e-36, -1.095560734966372e-35, 6.018531076210112e-36, -7.85230226349288e-36, -2.2804590405952378e-36, -1.516387712560751e-36, -2.433273306202135e-36, -8.287235173297127e-37, -3.0562853121379475e-36, -1.1707923734189984e-35, 1.4576129950196365e-36, -1.0861567801597937e-35, 2.82118644197349e-36, -9.827132772874324e-36, 2.926980933547496e-36, 3.408933617384634e-36, 7.85230226349288e-36, -1.774996469741654e-36, 1.2789378536946488e-35, 1.285696946211877e-37, 7.899322037525772e-36, -1.2166366531010676e-36, -1.095560734966372e-35, -2.8682062160063815e-36, -2.1393997184965633e-36, -1.0062231643038781e-35, 1.7985063567581e-36, 8.087401133657338e-36, 1.5942642133027274e-37, 4.631447742239813e-36, 9.450974580611192e-36, 1.1049646897729503e-35, -5.430783900798968e-36, 9.262895484479626e-36, 1.9513206223649973e-36, -5.054625708535836e-36, 5.289724578700294e-36, 2.228737289159057e-35, -1.2695338988880705e-35, 1.0861567801597937e-35, 1.532844633472263e-35, 1.504632769052528e-36, 1.3259576277275403e-35, -1.814963277669612e-35, -8.46355932592047e-36, -2.3039689276116835e-36, -9.497994354644083e-36, 1.1754943508222875e-36, -5.830451980078546e-36, 4.513898307157584e-36, 2.0100953399061116e-36, 1.0203290965137456e-35, -8.087401133657338e-36, 2.2216843230541234e-36, -8.346009890838241e-37, 5.430783900798968e-36, 9.827132772874324e-36, -4.53740819417403e-36, 1.1990042378387333e-36, 2.7506567809241528e-36, -3.6205226005326455e-36, 1.3259576277275403e-35, 1.1801963282255767e-35, -1.5422485882788412e-35, -1.974830509381443e-36, -1.3224311446750734e-36, -7.099985878966617e-36, -4.790139479600822e-37, 7.85230226349288e-36, 8.557598873986253e-36, 1.3259576277275403e-35, -2.433273306202135e-36, -1.0861567801597937e-35, 6.612155723375367e-37, 1.3259576277275403e-35, -7.85230226349288e-36, -2.456783193218581e-36, -4.725487290305596e-36, 2.5860875718090325e-36, 4.1847598889273435e-36, 1.1143686445795286e-35, 3.385423730368188e-36, 1.0297330513203239e-35, 1.1378785315959743e-35, -1.610427260626534e-36, 3.8086016966642115e-36, 9.592033902709866e-36, 9.874152546907215e-36, -1.8102613002663228e-36, -5.360254239749631e-36, -1.8102613002663228e-36, 7.946341811558664e-36, -5.853961867094992e-36, -5.853961867094992e-36, 1.0203290965137456e-35, -1.222514124855179e-35, -1.1190706219828177e-35, 4.513898307157584e-36, 9.462729524119414e-37, 6.864887008802159e-36, -7.099985878966617e-36, -2.057115113939003e-36, 1.0062231643038781e-35, -8.275480229788904e-36, -1.1660903960157092e-35, 1.7397316392169855e-36, -1.692711865184094e-36, -1.2319180796617573e-35, 2.715391950399484e-36, 1.128474576789396e-35, 8.181440681723121e-36, 1.3988382774785221e-36, 9.73309322480854e-36, -1.6080762719248893e-35, 5.466048731323637e-37, -2.0453601704307803e-36, 1.1190706219828177e-35, -1.1660903960157092e-35, 1.095560734966372e-35, -3.9966807927957775e-37, -6.535748590571919e-36, -1.1931267660846218e-36, -6.629788138637702e-36, 9.450974580611192e-36, -1.0579449157400588e-35, 6.206610172341678e-36, 5.430783900798968e-36, -2.997510594596833e-36, 2.362743645152798e-36, 1.1225971050352846e-36, -1.5281426560689738e-36, -1.344765537340697e-35, 1.9160557918403286e-36, 6.065550850243004e-36, 1.8901949161222383e-35, 1.4482090402130582e-35, -1.4199971757933233e-35, 1.2695338988880705e-35, -9.027796614315168e-36, -1.9513206223649973e-36, 7.005946330900834e-36, -2.080625000955449e-36, -9.450974580611192e-36, -4.4668785331246925e-36, -1.892545904823883e-36, -2.0782740122538043e-35, 4.161250001910898e-36, -1.0861567801597937e-35, 4.261167021730792e-37, 6.770847460736376e-36, -6.78847987599871e-37, -9.309915258512517e-36, -3.126814973187285e-36, 7.875812150509326e-37, -7.805282489459989e-36, 8.369519777854687e-36, 1.9689530376273316e-37, 1.986585452889666e-36, -3.879131357713549e-36, 4.137740114894452e-36, -4.5609180811904755e-36, 5.8774717541114375e-37, -9.027796614315168e-36, 1.1660903960157092e-35, 1.986585452889666e-36, 1.2319180796617573e-35, 8.9925317837905e-37, 6.770847460736376e-36, -4.1847598889273435e-36, 3.0562853121379475e-36, -5.987674349501027e-38, 2.3133728824182618e-35, -1.1566864412091309e-35, -1.3224311446750734e-36, -2.339233758136352e-36, 4.86654661240427e-36, -5.466048731323637e-37, 5.830451980078546e-36, -7.993361585591555e-36, 5.47780367483186e-36, 5.830451980078546e-36, -2.0218502834143345e-36, -6.58276836460481e-36, -8.639883478543813e-37, -1.692711865184094e-36, -2.433273306202135e-36, 1.3400635599374078e-36, 5.0311158215193905e-36, -6.958926556867942e-36, 5.3132344657167395e-36, 1.5234406786656846e-35, -1.3729774017604318e-35, 4.419858759091801e-36, -6.770847460736376e-36, 7.241045201065291e-36, -3.408933617384634e-36, -6.083183265505338e-37, 7.758262715427098e-36, -5.054625708535836e-36, 6.347669494440353e-36, -6.629788138637702e-36, -4.302309324009572e-36, 7.899322037525772e-36, -1.2413220344683356e-35, -1.570460452698576e-35, 1.065291755432698e-37, -7.664223167361315e-36, -1.3635734469538535e-35, -7.899322037525772e-36, -4.043700566828669e-36, 3.2443644082695135e-36, -2.7859216114488214e-36, -1.6574470346594254e-36, -1.6456920911512025e-36, 4.584427968206921e-36, -1.3917853113735884e-35, 1.3929608057244107e-36, 1.0285575569695016e-36, -9.827132772874324e-36, 2.1393997184965633e-36, 6.065550850243004e-36, 4.4668785331246925e-36, -3.549992939483308e-36, 1.0109251417071673e-36, 3.667542374565537e-36, -6.958926556867942e-36, 2.915225990039273e-36, -8.13442090769023e-36, -1.0767528253532154e-35, 1.4858248594393714e-35, 3.9026412447299945e-36, -3.220854521253068e-36, 4.419858759091801e-36, -4.86654661240427e-36, -8.757432913626042e-37, 7.099985878966617e-36, -1.2342690683634019e-36, 3.314894069318851e-36, 8.79269774415071e-36, -9.309915258512517e-36, 1.43880508540648e-35, 9.545014128676975e-36, 1.3283086164291849e-36, 8.510579099953362e-36, 1.8901949161222383e-35, -2.2099293795459005e-36, 9.07481638834806e-36, -3.0562853121379475e-36, 1.7514865827252084e-36, 3.032775425121502e-36, 2.527312854267918e-36, 1.3071497181143837e-35, 5.383764126766077e-36, 9.874152546907215e-36, 1.2977457633078054e-35, 9.07481638834806e-36, -1.4952288142459497e-35, 7.4350017689509685e-37, -5.383764126766077e-36, 7.382104523163966e-36, -2.268704097087015e-36, -2.0923799444636718e-36, 8.13442090769023e-36, 2.8917161030228273e-36, 9.827132772874324e-36, -1.8102613002663228e-36, -2.82118644197349e-36, -2.174664549021232e-36, -2.453844457341525e-37, -4.419858759091801e-36, 5.853961867094992e-36, -9.73309322480854e-36, -2.73890183741593e-36, 3.948926334793622e-38, 8.604618648019145e-36, -9.73309322480854e-36, -1.3635734469538535e-35, -7.052966104933725e-36, 5.289724578700294e-36, -7.241045201065291e-36, 1.128474576789396e-35, 1.2871663141504048e-36, 7.376227051409854e-37, 6.347669494440353e-36, 7.288064975098183e-36, -6.347669494440353e-36, 7.52316384526264e-36, 2.550822741284364e-36, -2.0218502834143345e-36, -1.5516525430854195e-36, 6.347669494440353e-36, -1.410593220986745e-36, 1.2037062152420224e-35, -5.3132344657167395e-36, 3.173834747220176e-36, 8.933757066249385e-36, 1.0297330513203239e-35, 8.040381359624447e-36, 1.3665121828309092e-37, 1.1801963282255767e-35, 6.018531076210112e-36, 3.5264830524668625e-36, -7.335084749131074e-36, 5.47780367483186e-36, -8.087401133657338e-36, 2.8917161030228273e-36, -3.032775425121502e-36, -5.025238349765279e-37, 9.771296791210265e-38, 1.0485409609334805e-35, 1.1990042378387333e-35, 1.0767528253532154e-35, -1.0638223874941702e-36, 7.170515540015954e-37, -7.85230226349288e-36, -1.0626468931433479e-35, -1.034435028723613e-35, 4.70197740328915e-36, 1.7115197747972506e-35, -1.4294011305999016e-35, -4.86654661240427e-36, 8.933757066249385e-36, -1.0579449157400588e-35, 7.429124297196857e-36, -7.288064975098183e-36, 1.0101904577379033e-38, -1.1707923734189984e-35, 6.112570624275895e-36, -1.0391370061269022e-35, 3.314894069318851e-36, -1.8572810742992143e-36, -1.095560734966372e-35, -2.362743645152798e-36, 2.4685381367268038e-36, 5.466048731323637e-37, 1.1801963282255767e-35, 9.027796614315168e-36, 3.92615113174644e-36, 9.309915258512517e-36, -1.3353615825341186e-35, 1.786751413249877e-36, 1.0579449157400588e-36, -1.3635734469538535e-35, 5.7834322060456545e-36, 7.429124297196857e-36, -3.314894069318851e-36, -1.034435028723613e-35, 8.040381359624447e-36, 4.231779662960235e-36, -6.676807912670593e-36, -9.262895484479626e-36, -3.079795199154393e-36, -2.5860875718090325e-36, 1.7632415262334313e-36, -4.8430367253878245e-36, -9.450974580611192e-36, -3.949661018762886e-36, 1.3635734469538535e-35, -6.958926556867942e-36, -3.314894069318851e-36, 1.0297330513203239e-35, 1.9654265545748647e-35, -1.6080762719248893e-35, 4.513898307157584e-36, -4.0907203408615605e-36, -9.686073450775649e-36, -4.678467516272704e-36, -1.1402295202976189e-36, -3.103305086170839e-36, -5.759922319029209e-37, 1.6339371476429796e-36, 2.2099293795459005e-36, -1.4987552972984166e-36, -7.570183619295532e-36, -1.0485409609334805e-35, 7.758262715427098e-36, -5.948001415160775e-36, -3.832111583680657e-36, 1.1002627123696611e-35, -2.6918820633830384e-36, 4.984096047486499e-36, -2.228737289159057e-35, 6.832560914154546e-38, -8.839717518183602e-36, -3.9966807927957775e-37, 1.504632769052528e-35, -3.032775425121502e-36, 5.289724578700294e-36, 6.91190678283505e-36, -1.1049646897729503e-35, -8.79269774415071e-36, -6.535748590571919e-36, 1.8102613002663228e-36, 4.067210453845115e-36, 4.7489971773220415e-36, 3.408933617384634e-36, 4.6549576292562585e-36, -1.0626468931433479e-35, -9.168855936413843e-36, 2.644862289350147e-36, 6.206610172341678e-36, -3.9966807927957775e-36, 4.772507064338487e-36, 7.993361585591555e-37, -5.054625708535836e-36, -4.043700566828669e-36, 2.5567002130384753e-37, -5.900981641127883e-36, -4.796016951354933e-36, -2.2099293795459005e-35, -1.410593220986745e-35, 1.275411370642182e-36, -1.6739039555709374e-35, -2.174664549021232e-36, 2.3274788146281293e-36, -4.4668785331246925e-36, -7.570183619295532e-36, -1.5398975995771966e-36, 3.9966807927957775e-36, -2.82118644197349e-36, -1.504632769052528e-35, 2.0218502834143345e-36, 3.738072035614874e-36, 4.2787994369931265e-36, 7.85230226349288e-36, -9.639053676742758e-37, -1.2789378536946488e-35, -8.933757066249385e-36, 1.0579449157400588e-35, -9.121836162380951e-36, -1.516387712560751e-36, 4.067210453845115e-36, 9.8153778293661e-37, -6.435831570752024e-37, -6.91190678283505e-36, 6.535748590571919e-36, -1.4294011305999016e-35, -2.4215183626939123e-36, -9.4039548065783e-37, 1.9654265545748647e-35, 3.855621470697103e-36, 2.82118644197349e-36, -5.0840130673063935e-37, 2.2216843230541234e-36, 7.946341811558664e-36, 1.3259576277275403e-35, -5.3132344657167395e-36, 8.275480229788904e-36, 4.208269775943789e-36, 7.241045201065291e-36, -9.844765188136658e-38, 9.545014128676975e-36, 5.736412432012763e-36, 5.736412432012763e-36, 9.215875710446734e-36, 2.0218502834143345e-35, 1.1801963282255767e-35, 2.6801271198748155e-36, -6.553381005834253e-37, 2.350988701644575e-36, 1.189600283032155e-35, -8.181440681723121e-36, -1.275411370642182e-36, -9.874152546907215e-36, 1.3635734469538535e-35, 4.3728389850589095e-36, -1.1143686445795286e-35, -1.1578619355599532e-36, -2.6801271198748155e-36, 3.103305086170839e-36, -6.065550850243004e-36, 2.433273306202135e-36, 1.5892683623117327e-35, 9.121836162380951e-36, -1.1660903960157092e-35, -1.2695338988880705e-35, 1.3297779843677127e-37, 8.557598873986253e-36, 8.322500003821796e-36, -5.5953531099140885e-36, -4.137740114894452e-36, 8.839717518183602e-36, 7.85230226349288e-36, -9.697828394283872e-37, 5.054625708535836e-36, 2.915225990039273e-36, 1.7679435036367204e-35, 3.103305086170839e-36, 9.73309322480854e-36, 8.510579099953362e-36, -5.242704804667402e-36, -9.027796614315168e-36, -1.3071497181143837e-35, -7.664223167361315e-36, -5.830451980078546e-36, -9.286405371496071e-37, 3.6205226005326455e-36, 4.584427968206921e-36, -8.369519777854687e-36, 9.592033902709866e-36, -7.758262715427098e-36, 2.2099293795459005e-36, 1.88079096131566e-35, 8.416539551887579e-36, -5.348499296241408e-37, -9.968192094972998e-36, 1.8455261307909914e-36, 9.545014128676975e-36, 1.288341808501227e-35, -3.489748854003666e-38, 1.3729774017604318e-35, 7.758262715427098e-36, -6.347669494440353e-36, 3.502973165450417e-36, -2.1981744360376776e-36, 3.0562853121379475e-36, 4.419858759091801e-36, -1.1378785315959743e-35, 3.667542374565537e-36, 1.0814548027565045e-36, 1.2695338988880705e-35, 1.4179400606793843e-37, -4.419858759091801e-36, -4.3728389850589095e-36, -2.6889433275059827e-37, 1.2977457633078054e-35, 4.302309324009572e-36, -5.289724578700294e-36, -1.1707923734189984e-35, -2.4685381367268038e-36, -5.524823448864751e-36, -6.817867234769268e-36, 7.147005652999508e-36, 2.1158898314801175e-35, -3.173834747220176e-36, -8.510579099953362e-36, 1.2037062152420224e-35, -7.241045201065291e-36, 5.830451980078546e-36, -1.4858248594393714e-35, 9.827132772874324e-36, 4.7489971773220415e-36, 4.8430367253878245e-36, -9.309915258512517e-36, 3.3384039563352965e-36, -5.830451980078546e-36, -3.820356640172434e-37, 7.93458686805044e-37, 8.287235173297127e-37, -2.1393997184965633e-36, 1.476420904632793e-35, -1.0767528253532154e-35, 3.6205226005326455e-36, -2.3744985886610208e-36, -2.6595559687354255e-37, 7.711242941394206e-36, -5.242704804667402e-36, -8.510579099953362e-36, -5.5953531099140885e-36, -1.1143686445795286e-35, 7.241045201065291e-36, -1.1660903960157092e-35, -7.335084749131074e-36, 1.974830509381443e-36, -8.040381359624447e-36, 8.087401133657338e-36, -5.948001415160775e-36, -6.488728816539027e-36, 8.557598873986253e-36, 5.195685030634511e-36, 2.715391950399484e-36, 1.786751413249877e-36, -7.288064975098183e-36, 8.886737292216494e-36, 5.0311158215193905e-36, 8.87498234870827e-37, 6.065550850243004e-36, -5.430783900798968e-36, -5.101645482568728e-36, 5.148665256601619e-36, 1.2037062152420224e-35, -1.1566864412091309e-35, -1.1190706219828177e-35, 7.617203393328423e-36, 1.1472824864025526e-35, -1.1143686445795286e-35, -7.476144071229749e-36, 4.114230227878006e-36, -5.853961867094992e-36, -7.382104523163966e-36, -6.300649720407461e-36, 5.101645482568728e-36, -5.195685030634511e-36, -2.3274788146281293e-36, 7.93458686805044e-37, 1.5892683623117327e-35, -1.5516525430854195e-35, 1.3635734469538535e-35, -4.0907203408615605e-36, -6.318282135669795e-37, -2.6918820633830384e-36, 2.8682062160063815e-36, -6.629788138637702e-36, -1.3353615825341186e-35, -6.9648040286220535e-37, 1.2131101700486007e-35, 3.173834747220176e-36, -1.0638223874941702e-36, 1.774996469741654e-36, -9.168855936413843e-37, 1.6739039555709374e-35, 9.827132772874324e-36, 8.13442090769023e-36, 7.335084749131074e-36, -3.0562853121379475e-36, 1.8337711872827685e-36, -1.9513206223649973e-36, -7.241045201065291e-36, 8.839717518183602e-36, 6.441709042506136e-36, -6.58276836460481e-36, 1.8901949161222383e-35, 1.0720508479499262e-35, 1.3400635599374078e-36, 6.958926556867942e-36, -3.849743998942992e-37, -3.479463278433971e-36, 2.5743326283008096e-36, 9.756603111824986e-37, -5.0311158215193905e-36, 1.974830509381443e-35, 8.980776840282277e-36, -7.335084749131074e-36, -1.8102613002663228e-36, -9.521504241660529e-37, 1.598672317118311e-35, 5.64237288394698e-36, 1.3541694921472752e-35, -1.5234406786656846e-35, -1.8995988709288166e-35, -1.2601299440814922e-35, -2.3157238711199064e-36, 8.510579099953362e-36, 5.113400426076951e-37, 3.9966807927957775e-36, -1.4670169498262148e-35, 1.3077374652897949e-37, 3.361913843351742e-36, -3.009265538105056e-36, -3.032775425121502e-36, -1.6739039555709374e-35, 6.065550850243004e-36, -2.73890183741593e-36, -1.034435028723613e-35, -6.018531076210112e-36, 1.6339371476429796e-36, -3.949661018762886e-36, 1.910178320086217e-37, 1.7773474584432987e-35, -1.034435028723613e-35, 4.913566386437162e-36, -7.699487997885983e-37, 6.817867234769268e-36, -3.197344634236622e-36, 5.5953531099140885e-36, -1.7118136483849562e-37, -1.3917853113735884e-35, 7.099985878966617e-36, 3.8086016966642115e-36, 6.300649720407461e-36, -2.6566172328583698e-36, -2.4685381367268038e-37, 1.7491355940235638e-35, 1.704466808692317e-36, -1.38238135656701e-35, -1.3541694921472752e-35, -2.9504908205639416e-36, -1.1660903960157092e-35, -2.350988701644575e-36, -4.772507064338487e-36, 1.0297330513203239e-35, -1.5798644075051544e-35, -4.70197740328915e-36, -1.0203290965137456e-35, 1.3259576277275403e-35, -2.433273306202135e-36, 1.3077374652897949e-37, 2.644862289350147e-36, 1.4223481644949679e-36, -8.557598873986253e-36, -3.573502826499754e-36, -2.4391507779562466e-37, -4.9370762734536075e-36, 4.349329098042464e-37, 1.0203290965137456e-35, -9.027796614315168e-36, -8.839717518183602e-36, 3.267874295285959e-36, 8.46355932592047e-36, -3.855621470697103e-36, -1.1990042378387333e-36, 7.005946330900834e-36, -2.4685381367268038e-36, 2.9740007075803874e-36, -9.07481638834806e-36, 1.3885527019088271e-37, -1.2413220344683356e-35, -4.984096047486499e-36, 1.664500000764359e-35, -7.335084749131074e-36, 1.128474576789396e-35, 1.0485409609334805e-35, 4.53740819417403e-36, 5.830451980078546e-36, 1.7397316392169855e-36, 2.5743326283008096e-36, -3.573502826499754e-36, 1.6339371476429796e-36, 1.316553672920962e-35, 8.275480229788904e-36, 2.7624117244323756e-36, -1.96307556587322e-36, 3.5264830524668625e-36, 5.172175143618065e-36, -5.383764126766077e-36, -1.7632415262334313e-36, 5.3132344657167395e-36, 7.85230226349288e-36, 2.2804590405952378e-36, 1.0485409609334805e-35, -1.1660903960157092e-35, 3.573502826499754e-36, -1.5516525430854195e-35, 3.691052261581983e-36, -7.099985878966617e-36, -6.553381005834253e-37, 1.0579449157400588e-35, 1.0203290965137456e-35, -1.0485409609334805e-35, -8.839717518183602e-36, 1.8220162437745456e-36, 2.3321807920314184e-35, -4.5609180811904755e-36, 3.361913843351742e-36, -6.676807912670593e-36, -1.169616879068176e-36, 6.394689268473244e-36, 1.1707923734189984e-35, -1.1660903960157092e-35, 1.2460240118716248e-36, -2.915225990039273e-36, -2.600781251194311e-37, 5.195685030634511e-36, 1.532844633472263e-35, 7.241045201065291e-36, -4.678467516272704e-36, -5.289724578700294e-36, 9.027796614315168e-36, -8.745677970117819e-36, -2.4832318161120824e-37, -4.067210453845115e-36, -5.101645482568728e-36, -3.7175008844754842e-37, -4.878301555912493e-37, -1.9278107353485515e-36, 4.4668785331246925e-36, -3.644032487549091e-36, -9.07481638834806e-36, -5.101645482568728e-36, -3.6205226005326455e-36, 1.1472824864025526e-35, 1.3071497181143837e-35, 5.289724578700294e-37, 3.5264830524668625e-36, -1.222514124855179e-35, -6.065550850243004e-36, 1.1343520485435074e-36, 1.2319180796617573e-35, 6.958926556867942e-36, -7.2880649750981825e-37, -1.9936384189945996e-35, -2.4215183626939123e-36, 1.4670169498262148e-35, 1.3459410316915192e-36, -7.052966104933725e-36, -4.114230227878006e-36, 1.369450918707965e-36, -1.8619830517025034e-35, -1.2871663141504048e-36, 4.261167021730792e-37, 5.383764126766077e-36, -1.0720508479499262e-35, 2.715391950399484e-36, -3.361913843351742e-36, -7.899322037525772e-36, -3.8086016966642115e-36, 2.5155579107596953e-36, -6.25362994637457e-36, -1.3353615825341186e-35, -4.848914197141936e-37, -3.0562853121379475e-36, -4.255289549976681e-36, 1.0391370061269022e-35, -1.288341808501227e-35, -1.1049646897729503e-35, -3.314894069318851e-36, -5.524823448864751e-36, 1.0696998592482816e-36, -6.629788138637702e-36, -1.8619830517025034e-35, 7.241045201065291e-36, -6.394689268473244e-36, -4.607937855223367e-36, 4.631447742239813e-36, 8.757432913626042e-37, -5.195685030634511e-36, -8.040381359624447e-36, 1.5892683623117327e-35, 2.2804590405952378e-36, -6.347669494440353e-36, 3.92615113174644e-36, 1.3106762011668506e-36, -1.2413220344683356e-35, 2.1041348879718946e-36, 1.4294011305999016e-35, -3.5264830524668625e-36, -1.2131101700486007e-35, -3.667542374565537e-36, 7.241045201065291e-36, 3.291384182302405e-36, -4.419858759091801e-36, -1.1990042378387333e-36, 3.92615113174644e-36, -1.0720508479499262e-35, -5.5953531099140885e-36, -1.8102613002663228e-36, 5.195685030634511e-36, -1.6692019781676483e-36, 4.9370762734536075e-36, 5.948001415160775e-36, -1.8337711872827685e-36, 2.9740007075803874e-36, 6.817867234769268e-36, 1.3929608057244107e-36, 1.9043008483321058e-36, -5.853961867094992e-36, 1.250725989274914e-35, -1.0579449157400588e-36, -1.095560734966372e-35, -2.9093485182851616e-37, 1.6971199689996776e-37, 2.550822741284364e-36, -2.621352402333701e-36, -5.101645482568728e-36, 5.289724578700294e-36, 2.456783193218581e-36, 4.208269775943789e-36, -4.255289549976681e-36, -1.96307556587322e-36, -1.1002627123696611e-35, -5.583598166405866e-37, 1.2977457633078054e-35, -1.5610564978919978e-35, -1.0062231643038781e-35, -8.79269774415071e-36, 2.6595559687354255e-37, 1.6574470346594254e-36, 8.191726257292816e-38, -9.497994354644083e-36, -5.101645482568728e-36, -1.1990042378387333e-35, 3.1503248602037305e-36, -8.9925317837905e-37, -5.5013135618483055e-36, -9.73309322480854e-36, 1.680956921675871e-36, 6.629788138637702e-36, -4.378716456813021e-37, 4.255289549976681e-36, 1.034435028723613e-35, -5.360254239749631e-36, -4.678467516272704e-36, 3.667542374565537e-36, -2.3980084756774665e-36, 2.5978425153172554e-36, 1.316553672920962e-35, 5.736412432012763e-36, 1.1472824864025526e-35, 1.4670169498262148e-35, -2.5978425153172554e-36, -1.1660903960157092e-35, 4.4668785331246925e-36, -1.6339371476429796e-36, -3.738072035614874e-36, 1.1660903960157092e-35, -8.228460455756013e-36, 4.4668785331246925e-37, 2.644862289350147e-36, 5.64237288394698e-37, -3.267874295285959e-36, 1.774996469741654e-36, 8.886737292216494e-36, 7.099985878966617e-36, -9.497994354644083e-36, 6.230120059358124e-37, -2.644862289350147e-36, -1.3917853113735884e-35, -4.796016951354933e-36, -4.1436175866485635e-37, 1.250725989274914e-35, 9.309915258512517e-36, -1.410593220986745e-36, -8.651638422052036e-36, -1.0226800852153901e-36, 1.786751413249877e-35, 6.78847987599871e-37, 2.6918820633830384e-36, -8.416539551887579e-36, -2.0453601704307803e-36, 3.3384039563352965e-36, 3.079795199154393e-36, 3.4324435044010795e-36, -2.644862289350147e-36, -1.3917853113735884e-35, -3.667542374565537e-36, 3.549992939483308e-36, 6.065550850243004e-36, 9.874152546907215e-36, 1.2695338988880705e-35, 2.82118644197349e-36, -7.805282489459989e-36, 1.357695975199742e-36, -1.9160557918403286e-36, 7.664223167361315e-36, -3.455953391417525e-36, 1.4341031080031908e-36, 2.1276447749883404e-36, -5.524823448864751e-36, -3.032775425121502e-36, 9.07481638834806e-36, 3.92615113174644e-36, 1.2107591813469561e-36, -8.510579099953362e-36, 3.502973165450417e-36, -9.968192094972998e-36, 8.13442090769023e-36, 5.6893926579798715e-36, 1.8995988709288166e-35, -1.3071497181143837e-35, -2.915225990039273e-36, -7.476144071229749e-36, 1.1472824864025526e-35, -1.0720508479499262e-35, -8.79269774415071e-36, 3.361913843351742e-36, -8.510579099953362e-36, -1.1801963282255767e-35, -6.347669494440353e-36, -2.1981744360376776e-36, -5.571843222897643e-36, -7.946341811558664e-36, 1.4199971757933233e-35, -1.96307556587322e-36, 1.5398975995771966e-36, -2.4685381367268038e-36, -1.7397316392169855e-36, 6.817867234769268e-36, 1.3988382774785221e-36, 7.147005652999508e-36, -1.4482090402130582e-35, 6.394689268473244e-36, -1.8220162437745456e-36, 8.181440681723121e-36, -6.488728816539027e-36, -7.805282489459989e-36, -8.839717518183602e-36, 1.7632415262334313e-36, 2.0923799444636718e-36, -3.4236272967699124e-37, -8.933757066249385e-36, -3.7145621485984285e-36, -2.3274788146281293e-36, 3.103305086170839e-36, -6.488728816539027e-36, 6.018531076210112e-36, 7.052966104933725e-37, 6.770847460736376e-36, -1.829363083467185e-37, 6.817867234769268e-36, 3.361913843351742e-36, -5.148665256601619e-36, -3.408933617384634e-36, -1.1190706219828177e-35, 1.6833079103775157e-35, 1.189600283032155e-35, -1.316553672920962e-35, 5.830451980078546e-36, -3.644032487549091e-36, -9.497994354644083e-36, 7.052966104933725e-36, -9.121836162380951e-36, 1.532844633472263e-35, 6.25362994637457e-36, -3.667542374565537e-36, -1.2871663141504048e-36, -4.231779662960235e-36, -8.228460455756013e-37, -2.73890183741593e-36, 1.3259576277275403e-35, 2.82118644197349e-36, -1.3929608057244107e-36, -3.585257770007977e-37, 7.335084749131074e-36, -7.711242941394206e-36, 1.516387712560751e-36, 2.1158898314801175e-36, -2.068870057447226e-35, 2.6801271198748155e-36, 9.62435999735748e-38, 2.550822741284364e-36, 6.018531076210112e-36, 5.0311158215193905e-36, -2.8917161030228273e-36, -2.5155579107596953e-36, 2.6566172328583698e-36, -1.4294011305999016e-35, -9.545014128676975e-36, 9.545014128676975e-36, 3.126814973187285e-36, 1.189600283032155e-35, -2.2099293795459005e-35, 2.0453601704307803e-36, -3.408933617384634e-36, -1.7679435036367204e-35, 2.256949153578792e-36, 4.584427968206921e-37, 6.770847460736376e-36, -5.0311158215193905e-36, 5.736412432012763e-36, 3.314894069318851e-36, -1.8455261307909914e-36, -9.027796614315168e-36, 6.876641952310382e-37, 1.5722236942248095e-37, 5.466048731323637e-37, 1.189600283032155e-35, -1.0626468931433479e-35, -1.410593220986745e-35, 1.4693679385278594e-37, -1.1660903960157092e-35, -1.8220162437745456e-36, -1.7397316392169855e-35, -2.7859216114488214e-36, -2.040658193027491e-35, -8.322500003821796e-36, 7.241045201065291e-36, 9.874152546907215e-36, 8.639883478543813e-37, -4.8430367253878245e-36, -1.570460452698576e-35, -6.018531076210112e-36, -5.430783900798968e-36, 2.097081921866961e-35, -1.2037062152420224e-35, 1.4987552972984166e-36, -8.639883478543813e-37, -2.8917161030228273e-36, 7.805282489459989e-36, -9.215875710446734e-36, -5.730534960258652e-37, -1.532844633472263e-35, -1.1143686445795286e-35, -2.7624117244323756e-36, -5.148665256601619e-36, 3.691052261581983e-36, 5.900981641127883e-36, -8.557598873986253e-36, 1.2166366531010676e-36, -4.419858759091801e-36, 3.949661018762886e-36, -1.0168026134612787e-36, -4.9370762734536075e-36, 4.4668785331246925e-36, -7.1940254270324e-36, 4.255289549976681e-36, 6.441709042506136e-36, -2.0336052269225574e-36, 5.289724578700294e-36, -5.5953531099140885e-36, -1.6574470346594254e-36, -7.288064975098183e-36, -7.899322037525772e-36, 3.032775425121502e-36, 2.8446963289899358e-36, 3.6205226005326455e-36, -1.1707923734189984e-35, 1.946618644961708e-35, -3.644032487549091e-36, 9.827132772874324e-36, -7.711242941394206e-36, -2.4215183626939123e-36, -5.054625708535836e-36, -3.173834747220176e-36, -9.309915258512517e-36, -7.335084749131074e-36, -7.93458686805044e-37, -1.38238135656701e-35, -1.88079096131566e-37, -6.864887008802159e-36, 2.8682062160063815e-36, -8.980776840282277e-36, -7.993361585591555e-36, -1.0814548027565045e-36, -5.571843222897643e-36, -1.1660903960157092e-35, -1.0626468931433479e-35, 1.986585452889666e-36, -2.4215183626939123e-36, 1.4870003537901937e-36, -1.7679435036367204e-35, -6.723827686703485e-36, -5.736412432012763e-36, 7.899322037525772e-36, -2.4795583962657627e-39, -2.82118644197349e-36, -1.8337711872827685e-36, 7.993361585591555e-36, -5.360254239749631e-36, -2.4920480237432495e-36, 7.335084749131074e-36, 1.6739039555709374e-35, -4.70197740328915e-36, 1.2577789553798476e-36, -3.8086016966642115e-36, -7.85230226349288e-36, -5.0311158215193905e-36, -3.9026412447299945e-36, -5.430783900798968e-36, 8.510579099953362e-36, -7.805282489459989e-36, 3.0562853121379475e-36, 2.424457098570968e-38, 1.9654265545748647e-35, -2.3980084756774665e-36, 5.6893926579798715e-36, -4.255289549976681e-36, -6.347669494440353e-36, -2.4920480237432495e-36, -1.0203290965137456e-35, 6.58276836460481e-36, -2.7976765549570443e-36, 2.0100953399061116e-36, 9.121836162380951e-36, -4.043700566828669e-36, -9.262895484479626e-36, 7.805282489459989e-36, 1.0297330513203239e-35, 4.53740819417403e-36, 7.335084749131074e-36, 2.915225990039273e-36, 7.993361585591555e-36, -2.2922139841034606e-36, -5.5953531099140885e-36, 2.5978425153172554e-36, -6.488728816539027e-36, 7.335084749131074e-36, 1.5281426560689738e-36, 2.5860875718090325e-36, 5.948001415160775e-36, 1.43880508540648e-35, 2.6918820633830384e-36, -4.2787994369931265e-36, 1.2989212576586277e-36, 7.899322037525772e-36, -1.1660903960157092e-35, -2.2216843230541234e-36, 1.3400635599374078e-36, 3.314894069318851e-36, -4.419858759091801e-36, 4.5609180811904755e-36, 7.335084749131074e-36, -1.2601299440814922e-35, 3.455953391417525e-36, 1.0391370061269022e-35, -7.581938562803754e-37, 1.0861567801597937e-35, 3.691052261581983e-36, -1.2601299440814922e-35, 5.5013135618483055e-36, -4.325819211026018e-36, 5.510129769479473e-39, -6.065550850243004e-36, 1.88079096131566e-36, 6.864887008802159e-36, 3.667542374565537e-36, -3.126814973187285e-36, -6.906029311080939e-37, 6.065550850243004e-36, -2.453844457341525e-37, 2.1452771902506747e-37, -1.1402295202976189e-36, 3.5264830524668625e-36, 9.262895484479626e-36, 9.874152546907215e-36, 9.827132772874324e-36, -3.197344634236622e-36, -1.4914084576057773e-37, 5.995021189193666e-36, 1.1660903960157092e-35, -1.4199971757933233e-35, -4.513898307157584e-36, -8.46355932592047e-36, 1.0203290965137456e-35, 6.347669494440353e-36, -4.396348872075355e-36, -1.8220162437745456e-36, 1.1801963282255767e-35, -1.0297330513203239e-35, 3.220854521253068e-36, 1.3929608057244107e-36, -9.874152546907215e-36, 3.738072035614874e-36, -1.2037062152420224e-35, -2.0218502834143345e-36, 3.0562853121379475e-36, -7.711242941394206e-36, 9.73309322480854e-36, 6.206610172341678e-36, -4.6549576292562585e-36, -1.4576129950196365e-35, -2.5860875718090325e-36, 1.0720508479499262e-35, 6.723827686703485e-36, -1.476420904632793e-35, -2.8446963289899358e-36, -1.0297330513203239e-35, -1.2695338988880705e-35, -3.3501588998435194e-37, -1.3812058622161878e-36, 2.3039689276116835e-36, -6.065550850243004e-36, -3.314894069318851e-36, -4.678467516272704e-36, 2.456783193218581e-36, -7.85230226349288e-36, -2.130583510865396e-37, 9.121836162380951e-36, -8.287235173297127e-37, 9.686073450775649e-36, 1.6971199689996776e-37, 3.455953391417525e-36, -2.5155579107596953e-36, -4.607937855223367e-36, 1.3541694921472752e-35, 5.830451980078546e-36, -5.7834322060456545e-36, 5.054625708535836e-36, 4.86654661240427e-36, 2.915225990039273e-36, -7.005946330900834e-36, -7.85230226349288e-36, -1.1049646897729503e-35, -1.8367099231598242e-39, -7.099985878966617e-36, 6.065550850243004e-36, 2.040658193027491e-35, 2.5978425153172554e-36, -9.07481638834806e-36, 5.524823448864751e-36, 1.9160557918403286e-36, 4.255289549976681e-36, -9.93292726444833e-37, -6.723827686703485e-36, 1.222514124855179e-35, 3.032775425121502e-36, -7.993361585591555e-36, -7.147005652999508e-36, -4.6549576292562585e-36, -1.0062231643038781e-35, 1.2107591813469561e-36, -1.4987552972984166e-36, 1.7021158199906723e-35, 4.631447742239813e-36, -1.1049646897729503e-35, 4.725487290305596e-36, -7.570183619295532e-36, -2.2381412439656354e-35, -3.9966807927957775e-36, 7.099985878966617e-36, 8.040381359624447e-36, -9.168855936413843e-36, -1.6574470346594254e-36, -6.347669494440353e-36, 1.476420904632793e-35, 1.0579449157400588e-35, 2.621352402333701e-36, -5.383764126766077e-36, 1.4482090402130582e-35, 5.7834322060456545e-36, 1.758539548830142e-35, -9.73309322480854e-36, -1.2930437859045163e-36, -7.335084749131074e-36, -9.4039548065783e-36, 5.5953531099140885e-36, 1.410593220986745e-36, -7.288064975098183e-36, -6.488728816539027e-36, -9.73309322480854e-36, 3.197344634236622e-36, 3.34281206015088e-38, 3.691052261581983e-36, -7.335084749131074e-36, -3.291384182302405e-36, 5.995021189193666e-37, 1.4576129950196365e-35, 1.9513206223649973e-36, -2.0923799444636718e-36, 1.1002627123696611e-35, -9.309915258512517e-36, 6.488728816539027e-36, -1.4670169498262148e-35, 3.361913843351742e-36, -5.148665256601619e-36, 1.7779352056187099e-37, -8.9925317837905e-37, -7.099985878966617e-36, 8.040381359624447e-36, -9.07481638834806e-36, 2.0718087933242817e-37, 3.832111583680657e-36, -3.361913843351742e-36, 1.2695338988880705e-35, 8.369519777854687e-36, -1.2342690683634019e-36, 4.631447742239813e-36, 6.676807912670593e-36, 2.1158898314801175e-37, -9.592033902709866e-36, 3.0562853121379475e-36, 1.43880508540648e-35, 1.598672317118311e-36, -2.6918820633830384e-36, 1.598672317118311e-35, -8.13442090769023e-36, -5.101645482568728e-36, -1.1143686445795286e-35, 5.360254239749631e-36, -1.034435028723613e-36, 6.206610172341678e-36, 1.1378785315959743e-35, -4.1847598889273435e-36, -5.571843222897643e-36, 4.7489971773220415e-36, -9.07481638834806e-36, -9.686073450775649e-36, -2.433273306202135e-36, -1.1660903960157092e-35, -3.549992939483308e-36, -1.4482090402130582e-35, 5.47780367483186e-36, 6.394689268473244e-36, 2.1535056507064307e-35, -1.6739039555709374e-35, -4.913566386437162e-36, -3.188528426605455e-37, 1.6221822041347568e-36, -2.1981744360376776e-36, -5.148665256601619e-36, 4.913566386437162e-36, 8.087401133657338e-36, -4.2787994369931265e-36, 3.832111583680657e-36, -7.1117408224748394e-37, -1.516387712560751e-36, 2.0218502834143345e-36, -2.8682062160063815e-36, 2.2334392665623463e-36, 2.453844457341525e-37, -4.325819211026018e-36, 4.8430367253878245e-36, 1.586917373610088e-36, 1.095560734966372e-35, 4.796016951354933e-36, 5.360254239749631e-36, 6.065550850243004e-36, 1.4199971757933233e-35, 7.429124297196857e-36, -1.1990042378387333e-36, -3.6205226005326455e-36, 9.309915258512517e-36, 1.095560734966372e-35, 9.968192094972998e-36, -1.7115197747972506e-35, -2.527312854267918e-36, -1.2695338988880705e-35, 3.949661018762886e-36, 8.839717518183602e-36, -1.8220162437745456e-36, -1.2319180796617573e-35, -3.220854521253068e-36, 7.052966104933725e-36, 2.2804590405952378e-36, 5.5013135618483055e-36, 1.0485409609334805e-35, -8.485599844998388e-38, 1.8243672324761902e-35, 5.995021189193666e-37, 6.770847460736376e-36, -6.488728816539027e-36, 4.137740114894452e-36, 3.96729343402522e-37, -1.1108421615270617e-36, -3.9966807927957775e-36, -3.92615113174644e-36, -6.629788138637702e-36, -3.502973165450417e-36, 2.162909605513009e-36, -1.0297330513203239e-35, -3.173834747220176e-36, 2.0923799444636718e-36, -5.571843222897643e-36, 1.774996469741654e-36, 1.189600283032155e-35, 1.5892683623117327e-35, -1.704466808692317e-36, 4.913566386437162e-36, -5.853961867094992e-36, 1.0767528253532154e-35, 3.4324435044010795e-36, -4.8430367253878245e-36, -8.557598873986253e-36, -3.549992939483308e-36, 9.73309322480854e-36, -1.0109251417071673e-35, -2.7624117244323756e-36, -6.676807912670593e-36, 4.913566386437162e-36, 4.255289549976681e-36, -2.621352402333701e-36, 1.516387712560751e-36, -7.570183619295532e-36, 2.7859216114488214e-36, -1.2037062152420224e-35, -7.617203393328423e-36, 1.3635734469538535e-35, -3.032775425121502e-36, -9.968192094972998e-36, -7.85230226349288e-36, -1.6456920911512025e-35, -1.0579449157400588e-35, -1.250725989274914e-35, 1.2695338988880705e-35, -4.114230227878006e-36, -1.263656427133959e-36, -1.3259576277275403e-35, -1.1343520485435074e-36, 4.443368646108247e-36, 2.1393997184965633e-36, -1.9513206223649973e-36, 1.0297330513203239e-35, 1.4752454102819708e-36, -1.1190706219828177e-35, 9.4039548065783e-36, 2.9534295564409974e-37, 1.2695338988880705e-35, -6.58276836460481e-36, 6.817867234769268e-36, -1.8901949161222383e-35, 6.25362994637457e-36, -5.830451980078546e-36, -3.5264830524668625e-36, -2.497925495497361e-37, 7.229290257557068e-37, -2.6801271198748155e-36, 7.899322037525772e-36, -8.087401133657338e-36, 1.128474576789396e-35, 8.275480229788904e-36, -7.099985878966617e-36, -3.667542374565537e-36, -5.383764126766077e-36, 2.4685381367268038e-36, 3.314894069318851e-36, -9.968192094972998e-36, 5.142787784847508e-37, -1.1660903960157092e-35, -1.2789378536946488e-35, -1.9395656788567744e-36, 6.018531076210112e-36, 1.974830509381443e-35, -5.6893926579798715e-36, -7.382104523163966e-36, 4.7489971773220415e-36, 4.86654661240427e-36, 4.0907203408615605e-36, -4.7489971773220415e-36, 1.758539548830142e-35, -5.571843222897643e-36, 3.173834747220176e-36, -1.2977457633078054e-35, -1.2107591813469561e-36, 2.380376060415132e-37, 4.490388420141138e-36, -5.7834322060456545e-36, 9.07481638834806e-36, 4.607937855223367e-36, -6.347669494440353e-36, -7.664223167361315e-36, -1.8619830517025034e-35, -5.0311158215193905e-36, -7.476144071229749e-36, 2.0923799444636718e-36, 3.76158192263132e-36, 4.86654661240427e-36, -9.07481638834806e-36, -5.47780367483186e-36, -8.839717518183602e-36, -3.314894069318851e-36, -8.557598873986253e-36, -1.3929608057244107e-36, -1.532844633472263e-35, -3.479463278433971e-36, -8.087401133657338e-36, 3.92615113174644e-36, -7.93458686805044e-37, -2.82118644197349e-36, 9.121836162380951e-36, -7.85230226349288e-36, -6.159590398308787e-36, -6.018531076210112e-36, -4.173004945419121e-37, -7.581938562803754e-37, -1.6339371476429796e-36, 8.522334043461584e-37, 2.7624117244323756e-36, 1.128474576789396e-35, -1.0062231643038781e-35, -6.629788138637702e-36, -9.73309322480854e-36, 1.1378785315959743e-35, -1.087332274510616e-36, 1.570460452698576e-35, -1.3259576277275403e-35, -1.5398975995771966e-36, -8.839717518183602e-36, 4.043700566828669e-36, -6.25362994637457e-36, 4.349329098042464e-36, 5.936246471652552e-37, -3.644032487549091e-36, -5.900981641127883e-36, 5.195685030634511e-36, -5.571843222897643e-36, 5.319111937470851e-37, 4.067210453845115e-36, 1.0520674439859473e-36, 4.231779662960235e-36, 4.255289549976681e-36, 9.73309322480854e-36, 1.7985063567581e-36, 1.1049646897729503e-36, -5.0311158215193905e-36, 7.241045201065291e-36, 1.532844633472263e-35, 4.231779662960235e-37, -1.6574470346594254e-36, 3.032775425121502e-36, -6.629788138637702e-36, -1.570460452698576e-35, 5.5953531099140885e-36, -5.571843222897643e-36, 1.986585452889666e-36, 2.6918820633830384e-36, 1.4294011305999016e-35, 2.0923799444636718e-36, 4.255289549976681e-36, 7.099985878966617e-36, 3.314894069318851e-36, -1.4576129950196365e-36, -4.1847598889273435e-36, -6.958926556867942e-36, 3.267874295285959e-36, 9.686073450775649e-36, 7.005946330900834e-36, -1.410593220986745e-36, -4.984096047486499e-36, 1.316553672920962e-35, 8.287235173297127e-37, 4.984096047486499e-36, 1.2037062152420224e-35, 7.147005652999508e-36, 1.2131101700486007e-35, -1.946618644961708e-35, -1.3353615825341186e-35, 9.991701981989444e-37, 1.8619830517025034e-35, -7.85230226349288e-36, -1.369450918707965e-36, -5.054625708535836e-36, 1.3917853113735884e-35, 5.383764126766077e-36, -1.1660903960157092e-35, 5.830451980078546e-36, 3.220854521253068e-36, -4.8430367253878245e-36, -9.262895484479626e-36, -5.319111937470851e-37, 9.497994354644083e-36, 4.513898307157584e-36, 1.7279766957087626e-36, 2.1393997184965633e-36, 1.3812058622161878e-36, 7.147005652999508e-36, -1.4576129950196365e-35, -2.3603926564511533e-35, -1.8572810742992143e-36, -3.949661018762886e-36, -2.1981744360376776e-36, 6.25362994637457e-36, 3.644032487549091e-36, 1.3917853113735884e-35, 7.429124297196857e-36, 5.830451980078546e-36, -7.376227051409854e-37, -8.181440681723121e-36, 2.997510594596833e-36, 8.839717518183602e-36, 9.215875710446734e-36, 1.128474576789396e-35, 9.168855936413843e-36, -4.796016951354933e-36, -1.8995988709288166e-35, -4.984096047486499e-36, 5.054625708535836e-36, 2.997510594596833e-36, 1.852579096895925e-35, -1.814963277669612e-35, 9.356935032545409e-36, -1.3259576277275403e-35, -3.173834747220176e-36, -7.711242941394206e-36, 7.758262715427098e-36, 5.965633830423109e-37, -7.93458686805044e-37, 8.181440681723121e-36, 5.5013135618483055e-36, 1.4011892661801667e-35, -1.316553672920962e-35, -1.96307556587322e-36, 1.1143686445795286e-35, 6.629788138637702e-36, 1.5516525430854195e-36, 4.8430367253878245e-36, -5.5013135618483055e-36, 3.0562853121379475e-36, 1.535489495761613e-37, -2.9504908205639416e-36, -7.288064975098183e-36, -1.2789378536946488e-35, -9.262895484479626e-36, 2.057115113939003e-37, -5.948001415160775e-36, 2.0218502834143345e-35, -9.309915258512517e-36, -2.8446963289899358e-36, -8.369519777854687e-36, -3.76158192263132e-36, -1.1378785315959743e-35, -8.510579099953362e-36, -3.0268979533673903e-37, 1.6530389308438418e-38, 7.85230226349288e-36, -2.0453601704307803e-36, -5.495436090094194e-37, -1.5234406786656846e-35, -9.874152546907215e-36, -3.691052261581983e-36, -2.5743326283008096e-36, -1.463490466773748e-36, 1.1402295202976189e-36, 9.827132772874324e-36, 1.095560734966372e-35, 6.817867234769268e-36, 7.147005652999508e-36, 1.0109251417071673e-35, -7.1940254270324e-36, 1.7412010071555134e-37, 3.5264830524668625e-36, -9.309915258512517e-36, 3.549992939483308e-36, -2.865267480129326e-37, -9.168855936413843e-36, -4.0907203408615605e-36, 8.839717518183602e-36, 4.4668785331246925e-36, -5.995021189193666e-37, -3.385423730368188e-36, 1.1049646897729503e-35, 4.4668785331246925e-36, 5.101645482568728e-36, -3.855621470697103e-36, -5.289724578700294e-36, -3.667542374565537e-36, 4.86654661240427e-36, 1.570460452698576e-35, 5.319111937470851e-37, -7.617203393328423e-36, -3.549992939483308e-36, -1.4914084576057773e-37, 2.1981744360376776e-36, -9.356935032545409e-36, -5.64237288394698e-36, -1.758539548830142e-35, 3.103305086170839e-36, -1.9043008483321058e-36, -2.4920480237432495e-36, -1.3635734469538535e-35, -1.570460452698576e-35, 2.9504908205639416e-36, 1.1519844638058418e-36, -3.079795199154393e-36, -1.7397316392169855e-36, -9.121836162380951e-36, -6.676807912670593e-36, -7.429124297196857e-36, 1.0579449157400588e-35, 2.8682062160063815e-36, 7.85230226349288e-36, -5.5953531099140885e-36, -7.85230226349288e-36, -1.1578619355599532e-36, -6.629788138637702e-36, -3.408933617384634e-36, -2.7976765549570443e-36, -6.864887008802159e-36, 6.112570624275895e-36, -5.289724578700294e-36, -1.288341808501227e-35, -4.302309324009572e-36, -1.2413220344683356e-35, 1.3224311446750734e-36, -4.043700566828669e-36, -4.6549576292562585e-36, -6.465218929522581e-37, -9.07481638834806e-36, 9.968192094972998e-36, 5.612985525176423e-37, -6.488728816539027e-36, -8.557598873986253e-36, 3.314894069318851e-36, 5.995021189193666e-36, -2.1723135603195873e-35, 1.570460452698576e-35, -7.758262715427098e-36, 7.85230226349288e-36, -6.58276836460481e-36, -1.626884181538046e-35, 2.362743645152798e-36, 1.516387712560751e-36, 1.4482090402130582e-35, -8.05213630313267e-37, 7.805282489459989e-36, 3.573502826499754e-36, 1.1660903960157092e-35, -1.7162217522005398e-36, 1.476420904632793e-35, -8.933757066249385e-36, -4.513898307157584e-36, -1.7985063567581e-36, 1.610427260626534e-36, -2.1393997184965633e-36, 2.915225990039273e-36, -1.3729774017604318e-35, -1.1566864412091309e-35, -1.1660903960157092e-35, 1.570460452698576e-35, 1.4576129950196365e-35, -2.4685381367268038e-36, -2.339233758136352e-36, 2.433273306202135e-36, -1.034435028723613e-36, -1.6739039555709374e-35, -1.3729774017604318e-35, 7.241045201065291e-36, -1.2695338988880705e-35, -1.1801963282255767e-35, 1.2319180796617573e-35, -1.3988382774785221e-36, 2.068870057447226e-35, -3.314894069318851e-36, -6.58276836460481e-36, 8.087401133657338e-36, 5.853961867094992e-36, -1.9936384189945996e-35, 1.9395656788567744e-36, -9.756603111824986e-37, -1.774996469741654e-36, -1.1801963282255767e-35, -7.993361585591555e-36, -6.676807912670593e-36, -3.220854521253068e-36, -5.0311158215193905e-36, -6.300649720407461e-36, 7.971321066513637e-38, 4.2787994369931265e-36, -2.0218502834143345e-36, 3.879131357713549e-36, 7.476144071229749e-36, -1.8337711872827685e-36, 1.4341031080031908e-36, 9.309915258512517e-36, 5.0311158215193905e-36, -1.0168026134612787e-36, -1.9654265545748647e-35, -5.571843222897643e-36, -7.899322037525772e-36, -8.228460455756013e-36, 7.617203393328423e-36, 1.0579449157400588e-35, 6.723827686703485e-36, 1.8337711872827685e-36, -7.241045201065291e-36, 5.360254239749631e-36, 8.087401133657338e-36, 1.4952288142459497e-35, -1.1049646897729503e-35, -5.571843222897643e-36, -1.5234406786656846e-35, -5.383764126766077e-36, 1.4294011305999016e-35, 4.984096047486499e-36, -9.07481638834806e-36, 1.504632769052528e-35, 7.099985878966617e-36, 7.241045201065291e-36, -1.8572810742992143e-36, -4.86654661240427e-36, 9.036612821946335e-38, -1.0297330513203239e-35, 3.0562853121379475e-36, 1.2416159080560412e-37, 7.288064975098183e-36, -3.9966807927957775e-36, 1.1378785315959743e-35, 6.817867234769268e-36, -5.47780367483186e-36, -8.181440681723121e-36, -1.2131101700486007e-35, 7.664223167361315e-36, 4.7489971773220415e-36, 1.5234406786656846e-35, 5.383764126766077e-36, 6.065550850243004e-36, 6.018531076210112e-36, 6.700317799687039e-37, 2.997510594596833e-36, 1.974830509381443e-35, -3.291384182302405e-36, -2.915225990039273e-36, -5.8774717541114375e-37, -1.0050476699530558e-36, -6.676807912670593e-36, -4.396348872075355e-36, -2.2099293795459005e-36, 1.5234406786656846e-35, 3.479463278433971e-36, 2.0100953399061116e-36, -1.626884181538046e-35, -1.664500000764359e-35, -1.2460240118716248e-36, -3.173834747220176e-36, 2.7624117244323756e-36, 8.557598873986253e-36, -1.2131101700486007e-35, -1.4341031080031908e-36, -1.0109251417071673e-35, 8.087401133657338e-36, 4.678467516272704e-36, 9.827132772874324e-36, 4.3728389850589095e-36, -9.827132772874324e-36, 4.9370762734536075e-36, 1.43880508540648e-35, -4.9370762734536075e-37, 6.817867234769268e-36, -4.984096047486499e-36, 6.700317799687039e-37, -1.8337711872827685e-36, -7.993361585591555e-36, -2.456783193218581e-36, -3.502973165450417e-36, -4.772507064338487e-36, 5.571843222897643e-36, 1.5140367238591063e-35, -1.2131101700486007e-35, 8.980776840282277e-36, -1.6339371476429796e-36, -9.686073450775649e-36, -8.557598873986253e-36, 1.0285575569695016e-36, -1.189600283032155e-35, 4.1436175866485635e-37, 4.6549576292562585e-36, -9.309915258512517e-36, 7.617203393328423e-36, -5.47780367483186e-36, 1.0391370061269022e-35, -8.087401133657338e-36, -3.408933617384634e-36, -4.020190679812223e-36, -1.4858248594393714e-35, 3.6205226005326455e-36, 7.382104523163966e-36, -4.208269775943789e-36, 5.383764126766077e-36, -4.9370762734536075e-36, 7.382104523163966e-36, 9.545014128676975e-36, 1.8337711872827685e-35, 6.535748590571919e-36, 7.288064975098183e-36, -1.38238135656701e-35, 1.0062231643038781e-35, -2.73890183741593e-36, 1.5798644075051544e-35, 7.993361585591555e-36, -1.0579449157400588e-35, 2.8682062160063815e-36, 3.455953391417525e-36, -1.2930437859045163e-36, -7.241045201065291e-36, 8.416539551887579e-36, 1.1343520485435074e-36, -7.335084749131074e-36, -1.1801963282255767e-35, -5.5953531099140885e-36, 8.651638422052036e-36, -5.830451980078546e-36, -4.161250001910898e-36, -5.948001415160775e-36, 1.2789378536946488e-35, -4.796016951354933e-36, -1.0297330513203239e-35, -1.9043008483321058e-36, -4.984096047486499e-36, 1.610427260626534e-36, -2.3862535321692436e-36, 6.58276836460481e-36, -1.9513206223649973e-36, 1.3635734469538535e-35, -1.1660903960157092e-35, 3.76158192263132e-36, 3.667542374565537e-36, 8.13442090769023e-36, -1.0626468931433479e-35, 6.394689268473244e-36, 1.1990042378387333e-35, 1.786751413249877e-36, 1.189600283032155e-35, -1.1002627123696611e-35, 2.9740007075803874e-36, -5.995021189193666e-36, 9.545014128676975e-36, -1.0626468931433479e-35, 5.6893926579798715e-36, 1.0203290965137456e-35, -1.0297330513203239e-35, 4.772507064338487e-36, 1.5610564978919978e-35, -4.607937855223367e-36, -1.3635734469538535e-35, -5.853961867094992e-36, 3.455953391417525e-36, 1.9043008483321058e-36, 1.128474576789396e-35, 4.173004945419121e-37, -2.644862289350147e-36, -1.0062231643038781e-35, 1.087332274510616e-36, -3.408933617384634e-36, 5.5013135618483055e-36, 3.479463278433971e-36, 1.288341808501227e-35, 5.47780367483186e-36, -1.0767528253532154e-35, 6.58276836460481e-36, 6.676807912670593e-36, -3.197344634236622e-36, 1.1707923734189984e-35, -8.980776840282277e-36, 1.1660903960157092e-35, -7.875812150509326e-37, 3.2443644082695135e-36, -3.790969281401877e-37, 6.37705685321091e-37, -1.8220162437745456e-36, -1.2283915966092904e-36, 1.986585452889666e-36, -2.997510594596833e-36, -2.9534295564409974e-37, -4.043700566828669e-36, 7.93458686805044e-37, 4.8430367253878245e-36, 1.0109251417071673e-35, 3.3384039563352965e-36, -4.9370762734536075e-36, -9.827132772874324e-36, 5.172175143618065e-36, 5.5953531099140885e-36, -1.0109251417071673e-35, 5.242704804667402e-36, 1.1143686445795286e-35, 1.344765537340697e-35, -3.7145621485984285e-36, -4.796016951354933e-36, -3.92615113174644e-36, 2.456783193218581e-36, -1.7115197747972506e-35, 3.455953391417525e-36, -1.1707923734189984e-35, -2.915225990039273e-36, 7.993361585591555e-36, -4.885648395605132e-38, -9.497994354644083e-36, 1.9043008483321058e-36, -2.162909605513009e-36, -7.052966104933725e-36, -1.0168026134612787e-36, -4.513898307157584e-36, 3.691052261581983e-36, -3.573502826499754e-36, 1.626884181538046e-35, -2.1276447749883404e-36, 1.6456920911512025e-35, 8.933757066249385e-36, -8.604618648019145e-36, 8.745677970117819e-36, 7.476144071229749e-36, 8.604618648019145e-36, 1.1461069920517303e-36, 7.099985878966617e-36, -1.2789378536946488e-35, 4.419858759091801e-36, -5.948001415160775e-36, -1.4011892661801667e-35, -1.1566864412091309e-35, -2.5860875718090325e-36, -8.040381359624447e-36, 8.040381359624447e-36, 9.309915258512517e-36, -4.419858759091801e-36, -3.385423730368188e-36, 4.8430367253878245e-36, -8.557598873986253e-36, -3.032775425121502e-36, -1.2037062152420224e-35, 7.617203393328423e-36, -7.147005652999508e-36, -1.1190706219828177e-35, -2.915225990039273e-36, -4.2795341209623905e-38, 1.504632769052528e-35, -6.676807912670593e-36, 1.9395656788567744e-36, -1.2037062152420224e-35, 9.592033902709866e-36, 7.993361585591555e-36, 1.3635734469538535e-35, -1.2789378536946488e-35, -2.1393997184965633e-36, 5.054625708535836e-36, 1.3929608057244107e-36, 4.8430367253878245e-36, 3.6205226005326455e-36, -3.573502826499754e-36, 1.7021158199906723e-35, -5.571843222897643e-36, -8.557598873986253e-36, 6.723827686703485e-36, 8.604618648019145e-36, -1.0062231643038781e-35, 6.535748590571919e-36, 4.1847598889273435e-36, -3.502973165450417e-36, 1.3259576277275403e-35, 1.6833079103775157e-35, -2.0130340757831674e-37, 1.0579449157400588e-35, -8.933757066249385e-36, 1.6080762719248893e-35, 2.3656823810298536e-37, -1.2131101700486007e-35, 5.900981641127883e-36, 6.817867234769268e-36, 7.85230226349288e-36, 1.2789378536946488e-35, -9.215875710446734e-36, -8.13442090769023e-36, -3.009265538105056e-36, -3.079795199154393e-36, 2.7859216114488214e-36, 3.4236272967699124e-37, -8.87498234870827e-37, 3.855621470697103e-36, -2.621352402333701e-36, 1.8337711872827685e-36, -9.07481638834806e-36, -5.242704804667402e-36, -5.47780367483186e-36, 2.4685381367268038e-36, -4.255289549976681e-36, -1.5140367238591063e-35, -1.0109251417071673e-35, 1.586917373610088e-36, -5.5953531099140885e-36, -9.968192094972998e-36, -1.0109251417071673e-36, -1.0626468931433479e-35, -2.0218502834143345e-36, -5.5542108076353085e-37, -1.3259576277275403e-35, -4.6549576292562585e-36, -5.0311158215193905e-36, 1.9983403963978888e-36, 1.7632415262334313e-36, 5.5953531099140885e-36, 7.288064975098183e-36, 6.6415430821459244e-37, 8.632536638851174e-38, 1.0861567801597937e-35, 1.128474576789396e-35, -1.0579449157400588e-35, -1.88079096131566e-37, 1.570460452698576e-35, 4.208269775943789e-36, -7.617203393328423e-36, -1.6739039555709374e-35, -2.3274788146281293e-36, 1.96307556587322e-36, 9.874152546907215e-36, 2.1276447749883404e-36, -8.745677970117819e-36, -5.524823448864751e-36, -6.864887008802159e-36, 3.2326094647612906e-37, 4.513898307157584e-36, -9.686073450775649e-36, 1.8220162437745456e-36, 1.5234406786656846e-35, -1.5398975995771966e-36, -9.07481638834806e-36, -8.228460455756013e-36, 1.1049646897729503e-35, 1.6574470346594254e-36, -7.335084749131074e-36, -4.984096047486499e-36, 5.47780367483186e-36, 4.9370762734536075e-36, -2.527312854267918e-36, 9.73309322480854e-36, -3.408933617384634e-36, 1.3259576277275403e-35, -6.723827686703485e-36, -1.5398975995771966e-36, 6.700317799687039e-37, 1.1637394073140646e-36, -2.926980933547496e-36, -1.2789378536946488e-35, -9.309915258512517e-36, -1.4752454102819708e-36, 1.9043008483321058e-36, -1.4482090402130582e-35, 4.231779662960235e-37, 7.85230226349288e-36, -9.462729524119414e-37, -1.9395656788567744e-36, 1.610427260626534e-36, -9.07481638834806e-36, -8.13442090769023e-36, -9.545014128676975e-36, 7.899322037525772e-36, -1.2601299440814922e-35, 2.1441016958998524e-35, -4.419858759091801e-36, 6.206610172341678e-36, 4.1436175866485635e-37, -7.170515540015954e-37, 1.598672317118311e-35, 2.0923799444636718e-36, -1.4858248594393714e-35, -2.1276447749883404e-36, 1.7632415262334313e-36, 9.309915258512517e-36, -5.830451980078546e-36, 5.360254239749631e-36, -3.479463278433971e-36, -7.335084749131074e-36, 5.7834322060456545e-36, -2.3274788146281293e-36, 1.1566864412091309e-35, -8.839717518183602e-36, -1.6080762719248893e-35, -1.774996469741654e-36, -9.874152546907215e-36, 8.087401133657338e-36, -3.92615113174644e-36, -3.291384182302405e-36, -8.05213630313267e-37, 4.4668785331246925e-36, 8.999878623483139e-39, -5.172175143618065e-37, -5.242704804667402e-36, -1.8440567628524635e-37, -1.0062231643038781e-35, 1.1566864412091309e-35, 2.5743326283008096e-36, -3.973170905779332e-36, -5.7834322060456545e-36, 9.121836162380951e-36, 4.302309324009572e-36, 1.814963277669612e-35, -5.47780367483186e-36, -4.760752120830264e-37, -9.07481638834806e-36, -4.231779662960235e-36, -5.101645482568728e-36, 4.796016951354933e-36, 9.168855936413843e-36, 1.720923729603829e-35, -4.137740114894452e-36, -4.255289549976681e-36, 9.73309322480854e-36, -2.9740007075803874e-36, 3.76158192263132e-36, 4.419858759091801e-36, 8.9925317837905e-37, -5.948001415160775e-36, -1.3958995416014664e-37, 4.4668785331246925e-36, -1.8337711872827685e-36, -1.8337711872827685e-36, -6.018531076210112e-36, 2.915225990039273e-36, -5.3132344657167395e-36, -1.1872492943305104e-36, 4.419858759091801e-36, -1.3635734469538535e-35, 1.6080762719248893e-35, -1.6971199689996776e-37, 6.112570624275895e-36, 2.456783193218581e-36, 5.571843222897643e-36, -1.2695338988880705e-35, -1.4670169498262148e-35, 8.087401133657338e-36, -4.231779662960235e-36, 1.5398975995771966e-36, 3.103305086170839e-36, 8.651638422052036e-36, 8.040381359624447e-36, -4.349329098042464e-36, 5.172175143618065e-36, -1.1108421615270617e-36, -1.2413220344683356e-35, 2.4685381367268038e-36, -9.309915258512517e-36, 5.900981641127883e-36, 2.080625000955449e-36, 1.2131101700486007e-35, 1.3812058622161878e-37, 4.760752120830264e-37, 4.584427968206921e-36, 6.58276836460481e-36, -2.2099293795459005e-35, -1.504632769052528e-36, -2.6801271198748155e-36, -1.1660903960157092e-35, -9.07481638834806e-36, 2.5860875718090325e-36, -9.121836162380951e-36, 1.2283915966092904e-36, 9.93292726444833e-37, -6.723827686703485e-36, -1.0109251417071673e-35, -5.47780367483186e-36, -6.159590398308787e-36, -3.879131357713549e-36, 8.79269774415071e-36, 3.879131357713549e-36, -9.592033902709866e-36, 9.027796614315168e-36, -1.4458580515114136e-36, 6.535748590571919e-36, 6.441709042506136e-36, -1.2989212576586277e-36, -3.0562853121379475e-36, -1.1002627123696611e-35, 3.6205226005326455e-36, 6.676807912670593e-36, 1.598672317118311e-35, 1.5634074865936424e-36, 1.2131101700486007e-35, 7.335084749131074e-36, 8.087401133657338e-36, 1.3635734469538535e-35, -1.2695338988880705e-35, -1.8572810742992143e-36, -6.629788138637702e-36, -1.0297330513203239e-35, -3.92615113174644e-36, 2.527312854267918e-36, 3.6205226005326455e-36, 2.621352402333701e-36, -5.936246471652552e-37, -2.73890183741593e-36, -1.5281426560689738e-36, 8.933757066249385e-36, 3.785091809647766e-36, -9.686073450775649e-36, -1.410593220986745e-36, -8.557598873986253e-36, -7.899322037525772e-36, 1.774996469741654e-36, 1.034435028723613e-35, -4.419858759091801e-36, 1.3729774017604318e-35, 9.450974580611192e-36, 8.369519777854687e-36, 1.6339371476429796e-36, 5.148665256601619e-36, 6.700317799687039e-37, 3.667542374565537e-36, -5.289724578700294e-36, -3.549992939483308e-36, 2.997510594596833e-36, 7.335084749131074e-36, -1.2460240118716248e-36, 8.087401133657338e-36, -1.9936384189945996e-35, 6.159590398308787e-36, -9.545014128676975e-36, -4.760752120830264e-37, -1.9513206223649973e-36, -2.915225990039273e-36, -1.0062231643038781e-35, 1.7021158199906723e-35, -1.516387712560751e-36, -1.5398975995771966e-36, -1.0050476699530558e-36, -1.0461899722318359e-36, -4.8430367253878245e-36, -7.899322037525772e-36, 3.644032487549091e-36, -1.88079096131566e-36, -4.70197740328915e-38, -1.087332274510616e-36, -2.915225990039273e-36, -1.692711865184094e-35, -1.570460452698576e-35, 5.736412432012763e-36, 2.9740007075803874e-36, -9.462729524119414e-37, -6.347669494440353e-36, 6.535748590571919e-36, -1.9983403963978888e-36, -4.349329098042464e-36, 7.85230226349288e-36, -1.1931267660846218e-36, -3.6205226005326455e-36, 1.5234406786656846e-35, 9.110081218872728e-37, -2.3980084756774665e-36, -2.9740007075803874e-36, 2.747718045047097e-37, -2.245194210070569e-36, 1.344765537340697e-35, 1.2789378536946488e-35, -4.419858759091801e-36, -8.322500003821796e-36, -1.7773474584432987e-35, -4.607937855223367e-36, -6.206610172341678e-36, -1.7118136483849562e-37, -5.101645482568728e-36, -2.1393997184965633e-36, 4.0907203408615605e-36, 1.704466808692317e-36, 4.70197740328915e-36, 2.1723135603195873e-35, -5.242704804667402e-36, -3.032775425121502e-36, -3.949661018762886e-36, 7.099985878966617e-36, -1.128474576789396e-35, -9.121836162380951e-36, -4.772507064338487e-36, -9.545014128676975e-36, 1.0520674439859473e-36, 6.629788138637702e-36, 1.9043008483321058e-36, 5.830451980078546e-36, -1.128474576789396e-36, 1.8734441216230207e-37, 6.817867234769268e-36, -9.051306501331614e-37, 4.53740819417403e-36, -1.9513206223649973e-36, 2.5155579107596953e-36, 2.82118644197349e-37, 3.3501588998435194e-37, 3.9026412447299945e-36, -2.151154662004786e-36, -7.85230226349288e-36, 2.1041348879718946e-36, 8.087401133657338e-36, -4.86654661240427e-36, -2.715391950399484e-36, -5.360254239749631e-36, 8.040381359624447e-36, 1.7491355940235638e-35, -1.0579449157400588e-35, 2.5155579107596953e-36, 4.419858759091801e-36, 1.946618644961708e-35, 6.171345341817009e-37, 1.0109251417071673e-36, 3.9966807927957775e-36, -1.5140367238591063e-35, -1.1660903960157092e-35, -9.497994354644083e-36, 1.6221822041347568e-36, 7.699487997885983e-37, 2.621352402333701e-36, -3.8086016966642115e-36, -3.6205226005326455e-36, -1.2131101700486007e-35, -2.5860875718090325e-36, -3.408933617384634e-36, 1.0109251417071673e-36, -2.4215183626939123e-36, 9.4039548065783e-36, -4.043700566828669e-36, -4.643202685748036e-37, -1.075577331002393e-36, -1.2989212576586277e-36, 1.3729774017604318e-35, 3.9026412447299945e-36, 7.052966104933725e-36, -3.220854521253068e-36, 1.4199971757933233e-35, -1.974830509381443e-35, 1.7115197747972506e-35, 5.101645482568728e-36, -5.7834322060456545e-36, -9.027796614315168e-36, 6.488728816539027e-36, -6.441709042506136e-36, -1.3635734469538535e-35, -1.3259576277275403e-35, 5.195685030634511e-36, -5.360254239749631e-36, 7.4350017689509685e-37, 5.383764126766077e-36, 9.345180089037186e-37, -7.85230226349288e-36, 8.886737292216494e-36, 6.864887008802159e-36, -1.5634074865936424e-36, 9.545014128676975e-36, 1.5892683623117327e-35, 1.4576129950196365e-35, -1.5610564978919978e-35, 4.419858759091801e-36, 1.034435028723613e-35, 7.52316384526264e-36, 3.9966807927957775e-36, 1.7021158199906723e-35, 6.300649720407461e-36, 2.2804590405952378e-36, 1.189600283032155e-35, -1.0062231643038781e-35, 3.502973165450417e-36, -1.9160557918403286e-36, 1.4294011305999016e-35, 2.550822741284364e-36, 3.549992939483308e-36, -2.6801271198748155e-36, 2.5743326283008096e-36, -1.1660903960157092e-35, -6.629788138637702e-36, -3.291384182302405e-36, 7.85230226349288e-36, -5.948001415160775e-36, -1.5634074865936424e-36, 1.1637394073140646e-36, -3.385423730368188e-36, -5.195685030634511e-36, 2.0218502834143345e-36, 4.6549576292562585e-36, 4.984096047486499e-36, -1.8587504422377421e-37, 1.6174802267314676e-35, -5.5013135618483055e-36, 1.610427260626534e-36, -5.6893926579798715e-36, -1.986585452889666e-36, -1.410593220986745e-35, -7.758262715427098e-36, 2.3274788146281293e-36, 5.348499296241408e-37, 5.571843222897643e-36, 9.545014128676975e-36, -3.479463278433971e-36, -9.07481638834806e-36, 1.4576129950196365e-36, 2.527312854267918e-37, -9.697828394283872e-37, 1.4199971757933233e-35, -1.1707923734189984e-35, 1.2342690683634019e-36, -1.2989212576586277e-36, 3.832111583680657e-36, -2.6918820633830384e-36, 2.865267480129326e-37, 5.995021189193666e-36, 3.5970127135162e-36, -6.300649720407461e-36, 1.0767528253532154e-35, -1.664500000764359e-35, 6.770847460736376e-36, -3.790969281401877e-37, 3.738072035614874e-36, 1.189600283032155e-35, -1.7303276844104072e-35, -4.607937855223367e-36, 1.909002825735395e-35, 5.64237288394698e-36, 6.535748590571919e-36, 1.0461899722318359e-36, 3.103305086170839e-36, -7.335084749131074e-36, -9.4039548065783e-36, -1.0579449157400588e-35, 9.874152546907215e-36, 1.4870003537901937e-36, -3.92615113174644e-36, -1.3259576277275403e-35, 1.974830509381443e-36, -2.997510594596833e-36, -2.456783193218581e-36, -1.6550960459577808e-35, -5.524823448864751e-36, -2.6918820633830384e-36, 6.676807912670593e-36, -1.2695338988880705e-35, 1.0814548027565045e-36, 6.112570624275895e-36, -4.7489971773220415e-36, -2.456783193218581e-36, 6.159590398308787e-36, -1.3541694921472752e-35, -4.2787994369931265e-36, 7.805282489459989e-36, 6.112570624275895e-36, 9.874152546907215e-36, -3.92615113174644e-36, -1.7491355940235638e-35, -6.065550850243004e-36, -1.4482090402130582e-35, -1.4223481644949679e-36, 5.5953531099140885e-36, -8.510579099953362e-36, 1.1801963282255767e-35, 7.581938562803754e-37, -1.2977457633078054e-35, 2.7624117244323756e-36, -1.4670169498262148e-35, -1.2460240118716248e-36, -2.5978425153172554e-36, 3.6205226005326455e-36, 4.966463632224165e-37, -3.549992939483308e-36, 1.3353615825341186e-35, 5.995021189193666e-36, 1.034435028723613e-35, 9.309915258512517e-36, 4.70197740328915e-36, -4.255289549976681e-36, -1.0062231643038781e-35, -1.357695975199742e-36, 1.2695338988880705e-35, 8.886737292216494e-36, -8.839717518183602e-36, 1.2601299440814922e-35, -6.347669494440353e-36, 2.915225990039273e-36, -7.93458686805044e-37, -3.408933617384634e-36, 6.25362994637457e-36, -1.3259576277275403e-35, 1.4870003537901937e-36, -4.8430367253878245e-36, 1.3635734469538535e-35, -6.817867234769268e-36, -9.827132772874324e-36, 3.832111583680657e-36, -6.112570624275895e-36, 3.079795199154393e-36, 7.005946330900834e-36, 7.617203393328423e-36, -6.065550850243004e-36, -2.3862535321692436e-36, 5.5013135618483055e-36, -5.383764126766077e-36, -1.96307556587322e-36, 5.289724578700294e-36, 1.43880508540648e-35, -1.3077374652897949e-37, -1.6603857705364811e-37, 2.0923799444636718e-36, -2.2005254247393222e-35, -2.621352402333701e-36, 6.676807912670593e-36, -6.723827686703485e-36, 4.70197740328915e-36, -1.189600283032155e-35, -4.6549576292562585e-36, -3.6205226005326455e-36, 2.2804590405952378e-36, 6.25362994637457e-36, -1.3259576277275403e-35, -6.817867234769268e-36, 8.745677970117819e-36, 1.774996469741654e-36, 9.356935032545409e-36, -1.0062231643038781e-35, 5.148665256601619e-36, 1.1990042378387333e-35, -4.3728389850589095e-36, 5.3132344657167395e-36, 5.252990380237097e-38, -6.065550850243004e-36, 4.208269775943789e-36, 1.7021158199906723e-35, -4.53740819417403e-36, -4.984096047486499e-36, 1.288341808501227e-35, 1.786751413249877e-36, 1.4199971757933233e-35, -7.993361585591555e-36, 1.598672317118311e-35, 1.0767528253532154e-35, -2.8682062160063815e-36, -1.692711865184094e-36, -1.3541694921472752e-35, 4.86654661240427e-36, -4.53740819417403e-36, -1.3259576277275403e-35, -8.79269774415071e-36, -1.2107591813469561e-36, 9.592033902709866e-36, 5.3132344657167395e-36, 1.2037062152420224e-35, -3.76158192263132e-36, -2.245194210070569e-36, 2.3603926564511533e-35, 1.0485409609334805e-35, 7.993361585591555e-36, -7.617203393328423e-36, -3.973170905779332e-36, 5.853961867094992e-36, 8.228460455756013e-36, 3.8086016966642115e-36, 2.3274788146281293e-36, 1.1190706219828177e-35, 5.995021189193666e-36, -1.095560734966372e-35, -5.348499296241408e-37, 6.723827686703485e-36, -1.0626468931433479e-35, 9.497994354644083e-36, 5.948001415160775e-36, 7.85230226349288e-36, -5.242704804667402e-36, 3.9966807927957775e-36, -6.018531076210112e-36, 7.147005652999508e-36, -2.997510594596833e-36, -6.91190678283505e-36, -8.745677970117819e-36, 3.5970127135162e-36, -4.325819211026018e-36, -6.629788138637702e-36, -1.88079096131566e-35, 1.1707923734189984e-35, -5.524823448864751e-36, -1.9983403963978888e-36, 1.222514124855179e-35, -1.0485409609334805e-35, -3.314894069318851e-36, -1.1990042378387333e-35, 9.686073450775649e-36, 9.262895484479626e-36, -2.3980084756774665e-36, 4.4668785331246925e-36, 5.64237288394698e-36, 6.958926556867942e-36, -6.25362994637457e-36, 3.3384039563352965e-36, -1.189600283032155e-35, -8.275480229788904e-36, -1.680956921675871e-36, -1.7021158199906723e-35, 1.316553672920962e-35, -3.5970127135162e-36, 5.612985525176423e-37, -8.46355932592047e-36, -3.644032487549091e-36, -1.1143686445795286e-35, 3.361913843351742e-36, -2.4920480237432495e-36, 3.7145621485984285e-36, 3.173834747220176e-36, -8.839717518183602e-36, 4.984096047486499e-36, 1.0579449157400588e-35, 6.347669494440353e-36, -8.369519777854687e-36, -4.725487290305596e-36, 1.8619830517025034e-35, -3.949661018762886e-36, -1.3259576277275403e-35, 4.796016951354933e-36, -7.711242941394206e-36, 2.747718045047097e-37, 1.1402295202976189e-36, 1.96307556587322e-36, 2.71833068627654e-37, -1.0579449157400588e-35, -1.2930437859045163e-36, -5.0311158215193905e-36, 1.1931267660846218e-36, -1.222514124855179e-35, 7.946341811558664e-36, 4.86654661240427e-36, -9.827132772874324e-36, 9.545014128676975e-36, -5.5013135618483055e-36, 3.6205226005326455e-36, -9.827132772874324e-36, 1.0203290965137456e-35, -1.626884181538046e-35, -8.369519777854687e-36, -2.644862289350147e-36, -8.557598873986253e-36, -9.686073450775649e-36, 4.255289549976681e-36, -1.570460452698576e-35, -3.96729343402522e-37, 9.051306501331614e-37, 7.993361585591555e-36, -1.034435028723613e-35, 2.245194210070569e-36, -1.0461899722318359e-36, -1.0485409609334805e-35, -6.770847460736376e-36, -5.242704804667402e-36, 3.636685647856452e-38, -6.535748590571919e-36, -1.6574470346594254e-36, -2.7036370068912613e-37, -5.5953531099140885e-36, 1.344765537340697e-35, 2.0124463286077562e-35, -1.1190706219828177e-35, 2.915225990039273e-36, -1.3283086164291849e-36, 1.8431751420893468e-35, -1.476420904632793e-35, -9.686073450775649e-36, -4.419858759091801e-36, -1.3459410316915192e-36, -1.1472824864025526e-35, 1.2131101700486007e-35, 7.85230226349288e-36, 1.1990042378387333e-36, -7.241045201065291e-36, 6.347669494440353e-36, -5.360254239749631e-36, 8.745677970117819e-36, -7.52316384526264e-36, -6.58276836460481e-36, -1.189600283032155e-35, -2.3744985886610208e-36, -9.592033902709866e-36, -2.268704097087015e-36, -2.621352402333701e-36, 9.827132772874324e-36, 7.052966104933725e-36, 3.314894069318851e-36, 1.4294011305999016e-35, 7.570183619295532e-36, 6.817867234769268e-36, 6.817867234769268e-36, 1.8901949161222383e-35, -4.2787994369931265e-36, -1.0109251417071673e-35, -9.686073450775649e-36, -1.3259576277275403e-35, 1.0391370061269022e-35, -6.676807912670593e-36, -9.497994354644083e-36, -8.510579099953362e-36, -1.0203290965137456e-35, -1.3106762011668506e-36, 7.099985878966617e-36, 2.73890183741593e-36, 1.598672317118311e-36, 1.263656427133959e-36, -1.586917373610088e-36, -1.7115197747972506e-35, 4.319941739271907e-37, -1.692711865184094e-36, -1.9043008483321058e-36, -2.621352402333701e-36, 6.171345341817009e-37, -1.7279766957087626e-36, -4.5609180811904755e-36, 9.639053676742758e-37, 1.2037062152420224e-35, -5.195685030634511e-36, -2.1981744360376776e-36, 4.913566386437162e-36, -2.2099293795459005e-36, -3.173834747220176e-36, 4.2787994369931265e-36, 1.3635734469538535e-35, 1.0579449157400588e-35, 1.2319180796617573e-35, -2.245194210070569e-36, 1.504632769052528e-35, 2.5978425153172554e-36, -4.796016951354933e-36, 5.524823448864751e-36, 6.488728816539027e-36, 1.0109251417071673e-35, -1.1707923734189984e-35, -1.9654265545748647e-35, 7.52316384526264e-36, -1.8713870065090817e-35, 1.1402295202976189e-36, 9.844765188136658e-38, 4.8430367253878245e-36, 9.686073450775649e-36, -1.2977457633078054e-35, 1.3353615825341186e-35, -1.2131101700486007e-35, 6.488728816539027e-36, -2.8446963289899358e-36, 2.8446963289899358e-36, -1.4294011305999016e-35, 7.382104523163966e-36, -9.497994354644083e-36, 1.1660903960157092e-35, -4.678467516272704e-36, 9.07481638834806e-36, -8.322500003821796e-36, 9.356935032545409e-36, -2.997510594596833e-36, 1.3917853113735884e-35, 1.3259576277275403e-35, 2.527312854267918e-36, 4.86654661240427e-36, 5.64237288394698e-36, -8.839717518183602e-36, 1.9395656788567744e-37, -1.1637394073140646e-36, 6.347669494440353e-36, 7.899322037525772e-36, -1.6080762719248893e-35, 4.760752120830264e-37, -4.772507064338487e-36, 8.087401133657338e-36, 1.189600283032155e-35, 6.535748590571919e-36, 1.43880508540648e-35, -9.686073450775649e-36, 1.2037062152420224e-35, 7.805282489459989e-36, 3.009265538105056e-36, 1.3867159919856673e-38, 3.738072035614874e-36, 2.339233758136352e-36, -8.839717518183602e-36, -1.1190706219828177e-35, -1.6550960459577808e-35, 9.07481638834806e-36, 6.629788138637702e-36, 8.698658196084928e-36, 6.488728816539027e-36, -5.524823448864751e-36, 3.855621470697103e-36, 8.46355932592047e-36, -2.8446963289899358e-36, 3.009265538105056e-36, -7.52316384526264e-36, 3.471381754772068e-38, -3.973170905779332e-36, -7.946341811558664e-36, -5.113400426076951e-37, 1.2871663141504048e-36, -2.915225990039273e-36, -2.7506567809241528e-36, -4.8430367253878245e-36, 8.557598873986253e-36, -3.385423730368188e-36, 1.2695338988880705e-35, -3.8086016966642115e-36, -1.1225971050352846e-36, -2.433273306202135e-36, -2.1393997184965633e-36, -1.0109251417071673e-35, -1.1190706219828177e-35, -3.3384039563352965e-36, -2.4215183626939123e-36, -4.255289549976681e-36, 7.099985878966617e-36, 3.5970127135162e-36, 8.886737292216494e-36, -6.065550850243004e-36, -4.984096047486499e-36, 8.632536638851174e-38, 2.0100953399061116e-36, 9.07481638834806e-36, -4.114230227878006e-36, -3.220854521253068e-36, 2.5860875718090325e-36, 5.319111937470851e-37, 3.032775425121502e-36, 7.617203393328423e-36, 7.617203393328423e-36, 6.347669494440353e-36, -8.510579099953362e-36, 5.830451980078546e-36, -4.9370762734536075e-37, 5.900981641127883e-36, 6.065550850243004e-36, -5.583598166405866e-37, 6.864887008802159e-36, -9.356935032545409e-36, 6.958926556867942e-36, -2.268704097087015e-36, 1.1472824864025526e-35, 3.644032487549091e-36, 1.9513206223649973e-36, -7.93458686805044e-37, 8.557598873986253e-36, -1.4199971757933233e-35, -3.832111583680657e-36, 4.396348872075355e-36, -1.692711865184094e-35, -2.8917161030228273e-36, 1.946618644961708e-35, 1.2131101700486007e-35, 5.736412432012763e-36, -7.946341811558664e-36, 8.322500003821796e-36, 5.430783900798968e-36, -1.1461069920517303e-36, -5.289724578700294e-36, 3.361913843351742e-36, 1.3259576277275403e-35, -6.300649720407461e-36, 6.817867234769268e-36, -3.502973165450417e-36, 4.1847598889273435e-36, -2.82118644197349e-36, 9.968192094972998e-36, -1.0485409609334805e-35, 1.570460452698576e-35, 1.3259576277275403e-35, -1.0626468931433479e-35, -1.6362881363446242e-35, -1.4952288142459497e-35, 1.892545904823883e-36, -1.0297330513203239e-35, 6.723827686703485e-36, -1.598672317118311e-35, 7.85230226349288e-36, -8.816207631167156e-38, 2.0424214345537245e-37, -1.1002627123696611e-35, -6.906029311080939e-37, 9.697828394283872e-37, 4.2787994369931265e-36, 4.020190679812223e-36, 1.0403125004777244e-36, 3.361913843351742e-36, 7.335084749131074e-36, 5.571843222897643e-36, 1.5140367238591063e-35, 1.0297330513203239e-35, 3.7145621485984285e-36, 1.0626468931433479e-35, -9.592033902709866e-36, -1.6080762719248893e-35, -8.510579099953362e-36, -2.5978425153172554e-36, 1.0297330513203239e-35, 7.335084749131074e-36, 4.114230227878006e-36, -6.629788138637702e-36, -4.9370762734536075e-36, 9.07481638834806e-36, 3.92615113174644e-36, 1.9184067805419732e-35, 5.101645482568728e-36, -3.879131357713549e-36, 4.2787994369931265e-36, 2.915225990039273e-36, 7.85230226349288e-36, -3.220854521253068e-36, -1.1660903960157092e-35, -9.968192094972998e-36, 3.5970127135162e-36, -1.7115197747972506e-35, -6.300649720407461e-36, 3.122406869371701e-39, 7.817037432968212e-37, -6.441709042506136e-36, 8.651638422052036e-36, 1.189600283032155e-35, 7.005946330900834e-36, 5.101645482568728e-36, 4.043700566828669e-36, -6.25362994637457e-36, 2.0100953399061116e-36, 2.550822741284364e-36, 5.054625708535836e-36, 5.360254239749631e-36, -1.9160557918403286e-36, -2.256949153578792e-36, 3.438320976155191e-37, 4.8430367253878245e-36, 4.3728389850589095e-36, 6.018531076210112e-36, 1.128474576789396e-36, -3.314894069318851e-36, 8.816207631167156e-37, -7.946341811558664e-36, 1.4576129950196365e-35, -2.339233758136352e-36, -5.172175143618065e-36, -8.228460455756013e-36, -1.0485409609334805e-35, -5.830451980078546e-36, 6.817867234769268e-36, 1.4576129950196365e-35, -2.277520304718182e-37, -1.1343520485435074e-36, -1.6456920911512025e-35, 2.997510594596833e-36, -1.0767528253532154e-35, 2.4215183626939123e-36, -4.1847598889273435e-36, -6.065550850243004e-36, 4.026068151566335e-37, 2.1276447749883404e-36, -9.844765188136658e-38, 5.736412432012763e-36, -5.054625708535836e-36, -4.020190679812223e-36, 3.0562853121379475e-36, -2.6566172328583698e-36, -4.913566386437162e-36, 9.827132772874324e-36, 1.5234406786656846e-35, 4.231779662960235e-36, 1.680956921675871e-36, -6.629788138637702e-36, -5.995021189193666e-36, 1.5398975995771966e-36, 2.5126191748826395e-37, 1.692711865184094e-36, 3.126814973187285e-36, -8.228460455756013e-36, -4.208269775943789e-36, 3.76158192263132e-36, -4.208269775943789e-36, -4.8430367253878245e-36, -4.86654661240427e-36, -1.410593220986745e-35, 4.1847598889273435e-36, -4.8430367253878245e-36, 2.997510594596833e-36, 7.758262715427098e-36, 7.273371295712904e-38, 2.715391950399484e-36, -4.419858759091801e-36, -5.571843222897643e-36, 4.70197740328915e-36, 1.0109251417071673e-35, -1.3259576277275403e-35, -1.4199971757933233e-35, 6.347669494440353e-36, -3.103305086170839e-36, 3.92615113174644e-36, -1.1378785315959743e-35, -1.128474576789396e-35, 1.2601299440814922e-35, -1.3929608057244107e-36, -9.8153778293661e-37, -1.095560734966372e-35, -1.2342690683634019e-36, -2.2334392665623463e-36, 3.9966807927957775e-37, 2.453844457341525e-37, 5.830451980078546e-36, 2.1158898314801175e-35, 2.0923799444636718e-36, 7.946341811558664e-36, -8.46355932592047e-36, 2.080625000955449e-36, 9.309915258512517e-36, 6.629788138637702e-36, 3.314894069318851e-36, -2.7976765549570443e-36, -2.245194210070569e-36, -2.621352402333701e-36, -6.770847460736376e-36, -1.570460452698576e-35, 1.1049646897729503e-35, 1.288341808501227e-35, 3.032775425121502e-36, 6.91190678283505e-36, -1.0203290965137456e-35, 5.430783900798968e-36, 1.6174802267314676e-35, -1.5798644075051544e-35, 6.159590398308787e-36, 1.0720508479499262e-35, -5.64237288394698e-36, 3.291384182302405e-36, -1.6692019781676483e-36, -9.027796614315168e-36, 4.020190679812223e-36, -1.1990042378387333e-35, -9.309915258512517e-36, 9.686073450775649e-36, -1.7021158199906723e-35, -8.595802440387977e-38, 1.1143686445795286e-35, -1.0861567801597937e-35, -4.4668785331246925e-36, 1.0579449157400588e-35, 2.8446963289899358e-36, -2.7976765549570443e-36, -4.437491174354135e-37, -5.995021189193666e-36, 1.2695338988880705e-35, 1.1002627123696611e-35, -4.055455510336892e-37, 7.52316384526264e-36, -5.3132344657167395e-36, 6.394689268473244e-36, -6.112570624275895e-36, -3.549992939483308e-36, -1.3929608057244107e-36, -6.817867234769268e-36, 7.570183619295532e-36, 8.287235173297127e-37, 1.2601299440814922e-35, 2.3274788146281293e-36, 2.8446963289899358e-36, -2.5860875718090325e-36, -1.4294011305999016e-35, -8.980776840282277e-36, 3.314894069318851e-36, 8.651638422052036e-36, -4.043700566828669e-36, 1.6174802267314676e-35, 7.429124297196857e-36, 4.796016951354933e-36, 4.1847598889273435e-36, 1.5610564978919978e-35, -1.1461069920517303e-36, 1.1801963282255767e-35, 2.5155579107596953e-36, 8.698658196084928e-36, 6.553381005834253e-37, 5.172175143618065e-36, 2.0124463286077562e-35, -5.054625708535836e-36, 1.8337711872827685e-35, 2.362743645152798e-36, -5.524823448864751e-36, 7.617203393328423e-36, 1.5610564978919978e-35, 4.231779662960235e-36, -5.47780367483186e-36, -8.745677970117819e-36, -6.876641952310382e-37, 8.963144425019942e-38, -1.1801963282255767e-35, -6.112570624275895e-36, -7.493776486492083e-37, -1.0109251417071673e-35, 8.228460455756013e-36, 2.3980084756774665e-36, -2.0718087933242817e-37, -9.827132772874324e-36, 3.1503248602037305e-36, -6.676807912670593e-36, -2.2804590405952378e-36, -3.408933617384634e-36, -2.82118644197349e-36, -1.3224311446750734e-36, 8.557598873986253e-36, 1.1472824864025526e-35, 9.262895484479626e-36, -3.314894069318851e-36, -7.147005652999508e-36, 1.0861567801597937e-35, -1.3353615825341186e-35, -6.770847460736376e-36, -5.995021189193666e-36, 8.839717518183602e-36, -1.0297330513203239e-35, 1.4199971757933233e-35, 7.617203393328423e-36, -1.6362881363446242e-35, -1.7632415262334313e-36, -1.4670169498262148e-35, 3.502973165450417e-36, -6.37705685321091e-37, 1.5140367238591063e-35, 4.349329098042464e-36, -6.018531076210112e-36, -1.3635734469538535e-35, 8.79269774415071e-36, 5.965633830423109e-37, 5.84808439534088e-37, 1.3635734469538535e-35, 7.946341811558664e-36, -1.189600283032155e-35, 6.58276836460481e-36, -6.958926556867942e-36, -7.147005652999508e-36, 1.4294011305999016e-35, 4.161250001910898e-36, -1.9395656788567744e-36, -1.1707923734189984e-35, 1.1990042378387333e-36, 6.629788138637702e-36, 2.915225990039273e-36, -6.25362994637457e-36, 4.725487290305596e-36, -2.080625000955449e-36, 3.667542374565537e-36, 4.231779662960235e-36, 9.121836162380951e-36, -1.3106762011668506e-36, 8.79269774415071e-36, -4.137740114894452e-36, 9.991701981989444e-38, -5.383764126766077e-36, -5.0311158215193905e-36, -1.0062231643038781e-35, -4.678467516272704e-36, 1.598672317118311e-36, -5.736412432012763e-36, -9.07481638834806e-36, 4.325819211026018e-36, 3.9026412447299945e-36, 2.1041348879718946e-36, -1.1472824864025526e-35, 7.288064975098183e-36, 2.1864194925294548e-36, -1.1801963282255767e-35, 2.82118644197349e-36, -1.6339371476429796e-36, 9.8153778293661e-37, 1.0626468931433479e-35, -9.168855936413843e-36, 3.408933617384634e-36, 2.8917161030228273e-36, 7.052966104933725e-36, -1.6362881363446242e-35, 2.715391950399484e-36, 9.286405371496071e-37, -8.604618648019145e-36, 3.2443644082695135e-36, -6.723827686703485e-36, -7.899322037525772e-36, 4.607937855223367e-36, 1.476420904632793e-35, 6.629788138637702e-36, 5.5953531099140885e-36, -7.567244883418476e-38, 4.53740819417403e-36, -1.1566864412091309e-35, 1.0109251417071673e-35, -5.054625708535836e-36, 5.242704804667402e-36, 2.4685381367268038e-36, -1.8102613002663228e-36, 4.6549576292562585e-36, 8.698658196084928e-36, 2.3744985886610208e-36, -5.47780367483186e-36, -1.7279766957087626e-36, -3.785091809647766e-36, -6.488728816539027e-36, -1.1801963282255767e-35, -9.91823358506305e-38, 2.7506567809241528e-36, 7.335084749131074e-36, 9.262895484479626e-36, -1.0062231643038781e-35, -6.488728816539027e-36, 1.6550960459577808e-35, 4.1847598889273435e-36, -6.318282135669795e-37, -4.5609180811904755e-36, -8.510579099953362e-36, -1.410593220986745e-36, -3.0562853121379475e-36, 5.148665256601619e-36, -4.775445800215543e-38, -5.6893926579798715e-36, -1.410593220986745e-35, 1.3283086164291849e-36, -3.502973165450417e-36, -1.288341808501227e-35, 1.1002627123696611e-35, -4.443368646108247e-36, -1.704466808692317e-36, 1.4576129950196365e-35, -2.3862535321692436e-36, -2.1535056507064307e-35, 4.631447742239813e-36, -2.433273306202135e-36, -3.785091809647766e-36, 9.827132772874324e-36, -1.504632769052528e-36, -6.159590398308787e-36, 9.4039548065783e-36, -2.3321807920314184e-35, 3.9966807927957775e-36, 2.5860875718090325e-36, 1.0109251417071673e-35, 1.3635734469538535e-35, 1.4670169498262148e-35, 1.5610564978919978e-35, -9.545014128676975e-36, -6.206610172341678e-36, -9.309915258512517e-36, -9.07481638834806e-36, 4.173004945419121e-37, 1.7397316392169855e-36, 2.268704097087015e-36, -2.7976765549570443e-36, -1.1872492943305104e-36, -4.984096047486499e-36, -5.383764126766077e-36, 5.853961867094992e-36, -7.288064975098183e-36, -2.4920480237432495e-36, 9.356935032545409e-36, 4.490388420141138e-36, -7.099985878966617e-36, 1.5610564978919978e-35, -9.07481638834806e-36, -4.678467516272704e-36, 1.0203290965137456e-35, -9.262895484479626e-36, 1.598672317118311e-36, -3.691052261581983e-36, -7.993361585591555e-36, 7.476144071229749e-36, 9.592033902709866e-36, 7.382104523163966e-36, 3.92615113174644e-36, -4.3728389850589095e-36, -5.3132344657167395e-36, -8.639883478543813e-37, 1.0990872180188388e-36, -7.147005652999508e-36, -6.065550850243004e-36, 4.725487290305596e-36, -1.9395656788567744e-36, -6.91190678283505e-36, -4.86654661240427e-36, -8.557598873986253e-36, -4.137740114894452e-36, 5.3132344657167395e-36, -3.92615113174644e-36, 1.0520674439859473e-36, 1.0203290965137456e-35, 2.0923799444636718e-36, 1.626884181538046e-35, 9.827132772874324e-36, 6.906029311080939e-37, -9.93292726444833e-37, 2.2922139841034606e-36, 2.7624117244323756e-36, -5.289724578700294e-36, 3.267874295285959e-36, 9.521504241660529e-37, 3.173834747220176e-36, 2.0594661026406477e-35, -5.3132344657167395e-36, 2.644862289350147e-36, -2.6918820633830384e-36, -5.6893926579798715e-36, 6.817867234769268e-36, -2.8446963289899358e-36, -4.86654661240427e-36, 1.263656427133959e-36, -6.817867234769268e-36, 1.0203290965137456e-35, 1.0767528253532154e-35, 1.2107591813469561e-36, 7.229290257557068e-37, -4.584427968206921e-36, 9.07481638834806e-36, 8.228460455756013e-36, 7.2880649750981825e-37, 4.513898307157584e-36, 7.52316384526264e-37, -2.915225990039273e-36, 1.786751413249877e-36, -3.5970127135162e-36, -6.770847460736376e-36, 1.7115197747972506e-35, -1.9983403963978888e-36, 2.433273306202135e-36, 9.73309322480854e-36, -1.1707923734189984e-35, -6.535748590571919e-36, 9.286405371496071e-37, 4.796016951354933e-36, 7.617203393328423e-36, -2.256949153578792e-36, -5.5013135618483055e-36, -7.052966104933725e-36, -2.0923799444636718e-36, -7.335084749131074e-36, -1.664500000764359e-35, 5.6893926579798715e-36, -9.697828394283872e-37, -1.96307556587322e-36, 2.4215183626939123e-36, 5.571843222897643e-36, -4.8430367253878245e-36, -1.1660903960157092e-35, 5.900981641127883e-36, -3.549992939483308e-36, -2.0100953399061116e-36, -1.5516525430854195e-35, -5.7834322060456545e-36, -1.6339371476429796e-36, -8.933757066249385e-36, 1.570460452698576e-35, -4.772507064338487e-36, 1.4199971757933233e-35, 9.168855936413843e-36, 2.7976765549570443e-36, -3.502973165450417e-36, -1.0285575569695016e-36, -5.7834322060456545e-36, -3.408933617384634e-36, 3.667542374565537e-36, 9.545014128676975e-36, 6.91190678283505e-36, 7.664223167361315e-36, -1.4752454102819708e-36, 1.4458580515114136e-36, 3.849743998942992e-37, -2.527312854267918e-36, 4.86654661240427e-36, 7.758262715427098e-36, -1.6574470346594254e-36, 5.383764126766077e-36, 1.3071497181143837e-35, -1.1660903960157092e-35, -7.617203393328423e-36, -4.984096047486499e-36, 5.054625708535836e-37, -6.876641952310382e-37, -9.356935032545409e-36, -1.275411370642182e-36, 7.85230226349288e-36, 4.2787994369931265e-36, 6.612155723375367e-37, -1.316553672920962e-35, -1.2319180796617573e-35, -3.267874295285959e-36, 2.926980933547496e-36, 1.2695338988880705e-35, 5.289724578700294e-36, -1.3541694921472752e-35, 1.2695338988880705e-35, 1.7491355940235638e-35, 4.319941739271907e-37, -2.456783193218581e-36, -1.0391370061269022e-35, -1.88079096131566e-36, -7.899322037525772e-36, 9.545014128676975e-36, 5.995021189193666e-36, 2.9828169152115546e-37, -1.986585452889666e-36, 3.5970127135162e-36, 9.592033902709866e-36, -9.215875710446734e-36, -5.172175143618065e-36, 4.513898307157584e-36, -9.592033902709866e-36, 2.5126191748826395e-37, 2.997510594596833e-36, 1.2695338988880705e-35, -1.2977457633078054e-35, -8.557598873986253e-36, 1.0638223874941702e-36, 1.4011892661801667e-35, 4.6549576292562585e-36, 9.07481638834806e-36, 4.643202685748036e-37, -5.5953531099140885e-36, -2.0218502834143345e-36, 3.855621470697103e-36, -1.288341808501227e-35, 6.441709042506136e-36, 2.5743326283008096e-36, -1.2166366531010676e-36, 7.993361585591555e-36, -1.4011892661801667e-35, 5.64237288394698e-36, -5.995021189193666e-36, -6.770847460736376e-36, -7.335084749131074e-36, -2.3980084756774665e-36, -3.009265538105056e-36, 9.4039548065783e-36, 9.686073450775649e-36, 1.4870003537901937e-36, 9.521504241660529e-37, -3.197344634236622e-36, 4.9370762734536075e-36, -1.2601299440814922e-35, -1.0626468931433479e-35, -4.231779662960235e-36, -7.899322037525772e-36, 1.2166366531010676e-36, -1.3459410316915192e-36, -7.85230226349288e-36, 9.545014128676975e-36, 7.1940254270324e-36, 1.0062231643038781e-35, -5.948001415160775e-36, -4.349329098042464e-36, 8.745677970117819e-36, -2.057115113939003e-36, 1.344765537340697e-35, 1.504632769052528e-36, 1.369450918707965e-36, 3.855621470697103e-36, 3.197344634236622e-36, 8.287235173297127e-37, 7.640713280344869e-37, 7.005946330900834e-36, -8.040381359624447e-36, 3.197344634236622e-36, -5.5953531099140885e-36, 2.7859216114488214e-36, 5.47780367483186e-36, -4.4668785331246925e-37, 4.5609180811904755e-36, 3.103305086170839e-36, 2.256949153578792e-36, -5.64237288394698e-36, 9.73309322480854e-36, 6.629788138637702e-36, -1.814963277669612e-35, 4.5609180811904755e-36, -6.465218929522581e-38, 5.524823448864751e-36, 3.549992939483308e-36, -5.995021189193666e-36, 1.2166366531010676e-36, 5.383764126766077e-36, 2.527312854267918e-37, -2.915225990039273e-36, 1.504632769052528e-35, 1.6692019781676483e-36, -2.915225990039273e-36, -1.222514124855179e-35, -5.64237288394698e-36, 7.993361585591555e-36, 2.3039689276116835e-36, 1.96307556587322e-36, 2.456783193218581e-36, 2.926980933547496e-36, 7.52316384526264e-36, 2.915225990039273e-36, -6.488728816539027e-36, -6.535748590571919e-36, 5.571843222897643e-36, 1.2131101700486007e-35, 2.4685381367268038e-36, 3.385423730368188e-36, -1.5516525430854195e-35, -8.79269774415071e-36, -3.573502826499754e-36, -1.3729774017604318e-35, 9.309915258512517e-36, 1.4952288142459497e-35, 6.318282135669795e-38, -1.2131101700486007e-35, -1.0720508479499262e-35, 8.369519777854687e-36, 1.3635734469538535e-35, -1.0861567801597937e-35, 1.504632769052528e-35, -5.64237288394698e-36, 7.147005652999508e-36, 3.92615113174644e-36, -1.586917373610088e-36, 1.250725989274914e-35, 5.172175143618065e-36, -3.032775425121502e-36, -3.8086016966642115e-36, -3.032775425121502e-36, 5.430783900798968e-36, 9.168855936413843e-36, 2.5155579107596953e-36, 4.2787994369931265e-36, -1.4011892661801667e-35, 1.598672317118311e-36, -4.5609180811904755e-36, -1.0720508479499262e-35, -2.6918820633830384e-36, -5.195685030634511e-36, 1.7021158199906723e-35, 1.189600283032155e-35, 1.128474576789396e-35, -6.676807912670593e-36, 1.288341808501227e-35, -6.994191387392611e-37, 1.0062231643038781e-35, 8.181440681723121e-36, -6.723827686703485e-36, -2.82118644197349e-36, -6.91190678283505e-36, -1.4294011305999016e-35, -1.814963277669612e-35, 3.6440324875490913e-37, 3.126814973187285e-36, 9.8153778293661e-37, 2.3069076634887392e-37, 9.497994354644083e-36, -5.830451980078546e-36, -5.363192975626687e-38, -1.1660903960157092e-35, -6.018531076210112e-36, -7.617203393328423e-36, 2.5743326283008096e-36, 9.592033902709866e-36, 2.4685381367268038e-36, -9.262895484479626e-36, -9.262895484479626e-36, -1.4482090402130582e-35, 8.980776840282277e-36, -3.785091809647766e-36, 1.3259576277275403e-35, -1.4752454102819708e-36, 3.173834747220176e-36, -2.1276447749883404e-36, 9.874152546907215e-36, 3.314894069318851e-36, -6.347669494440353e-36, 5.853961867094992e-36, -6.535748590571919e-36, -8.485599844998388e-38, 7.241045201065291e-36, -5.830451980078546e-36, -1.1990042378387333e-35, -1.3071497181143837e-35, -9.309915258512517e-36, 9.521504241660529e-37, 1.7632415262334313e-36, -5.6893926579798715e-36, 3.079795199154393e-36, -8.369519777854687e-36, 1.0485409609334805e-35, 4.53740819417403e-36, 1.0062231643038781e-35, 6.864887008802159e-36, 7.335084749131074e-36, 8.181440681723121e-36, 6.58276836460481e-36, 3.4530146555404696e-37, -1.4294011305999016e-35, 1.1990042378387333e-35, 1.0626468931433479e-35, -7.147005652999508e-36, -6.018531076210112e-36, 1.0485409609334805e-35, -1.3812058622161878e-36, 3.4324435044010795e-36, -5.383764126766077e-36, -4.302309324009572e-36, -6.065550850243004e-36, 6.723827686703485e-36, 9.4039548065783e-36, -4.2787994369931265e-36, 1.2977457633078054e-35, -5.195685030634511e-36, 5.172175143618065e-37, 5.0311158215193905e-36, -4.678467516272704e-36, -9.827132772874324e-36, 1.6080762719248893e-35, -9.07481638834806e-36, 2.5155579107596953e-36, 1.7491355940235638e-35, 1.095560734966372e-35, -6.817867234769268e-36, -1.3988382774785221e-36, 7.899322037525772e-36, -4.2787994369931265e-36, 5.6893926579798715e-36, 6.91190678283505e-36, -4.0907203408615605e-36, 1.1002627123696611e-35, 1.0720508479499262e-35, -1.0932097462647274e-36, 1.0720508479499262e-35, 2.256949153578792e-36, 6.58276836460481e-36, -1.2131101700486007e-35, 6.112570624275895e-36, 3.6205226005326455e-36, 1.1707923734189984e-35, 6.159590398308787e-36, 7.147005652999508e-36, -4.984096047486499e-36, 8.698658196084928e-36, -7.493776486492083e-37, 5.360254239749631e-36, 1.6739039555709374e-35, 4.9370762734536075e-36, 6.553381005834253e-37, 9.168855936413843e-37, -8.040381359624447e-36, -2.5743326283008096e-36, 6.347669494440353e-36, -2.3862535321692436e-36, -3.879131357713549e-36, 8.416539551887579e-36, 3.408933617384634e-36, -1.1472824864025526e-35, -4.678467516272704e-36, -1.0297330513203239e-35, 2.938735877055719e-38, -5.172175143618065e-36, -1.5140367238591063e-35, -7.099985878966617e-36, -9.991701981989444e-37, -6.864887008802159e-36, 1.3738590225235485e-37, -9.592033902709866e-36, 6.394689268473244e-36, -1.4670169498262148e-35, 4.913566386437162e-36, -9.686073450775649e-36, -1.1225971050352846e-36, 1.8220162437745456e-36, 9.07481638834806e-36, -1.5516525430854195e-35, -5.995021189193666e-36, -3.291384182302405e-36, 4.796016951354933e-36, 1.4576129950196365e-36, 1.8055593228630336e-35, -1.8337711872827685e-35, 1.2789378536946488e-35, 9.592033902709866e-36, 6.347669494440353e-36, -7.335084749131074e-36, -7.4350017689509685e-37, 9.309915258512517e-36, 2.5978425153172554e-36, 1.7115197747972506e-35, 6.300649720407461e-36, 3.361913843351742e-36, 6.629788138637702e-36, -2.068870057447226e-36, -4.86654661240427e-36, 4.731364762059707e-37, 1.1049646897729503e-35, -8.698658196084928e-36, -4.86654661240427e-36, -6.58276836460481e-36, 9.051306501331614e-37, -1.0861567801597937e-35, -1.0062231643038781e-35, 7.241045201065291e-36, -1.2319180796617573e-35, 3.92615113174644e-36, 4.3728389850589095e-36, -1.4576129950196365e-36, -1.4576129950196365e-35, 1.6530389308438418e-38, 2.5860875718090325e-36, 2.3744985886610208e-36, -1.0109251417071673e-35, 7.85230226349288e-36, -1.4576129950196365e-36, -7.85230226349288e-36, 1.2131101700486007e-35, 9.874152546907215e-36, 3.032775425121502e-36, -1.986585452889666e-36, 1.2460240118716248e-36, 1.6574470346594254e-36, -1.4670169498262148e-35, 3.314894069318851e-36, -3.4824020143110267e-37, -1.222514124855179e-35, -1.9043008483321058e-36, -3.314894069318851e-36, -9.73309322480854e-36, 1.369450918707965e-36, 4.86654661240427e-36, -2.0453601704307803e-36, -1.9043008483321058e-36, 1.2977457633078054e-35, 8.275480229788904e-36, 7.052966104933725e-36, 9.345180089037186e-37, 1.2131101700486007e-35, 6.906029311080939e-37, 1.6456920911512025e-35, -1.3259576277275403e-35, -1.4670169498262148e-35, -1.7279766957087626e-36, 2.350988701644575e-36, 2.4685381367268038e-36, 5.6893926579798715e-36, 3.8086016966642115e-36, 7.099985878966617e-36, 2.4685381367268038e-36, 5.853961867094992e-36, 4.8430367253878245e-36, -7.241045201065291e-36, 4.678467516272704e-36, 1.3635734469538535e-35, 5.360254239749631e-36, 1.692711865184094e-35, 7.85230226349288e-36, -2.068870057447226e-36, -1.1167196332811731e-36, 4.396348872075355e-36, -6.723827686703485e-36, -4.255289549976681e-36, 3.92615113174644e-36, 7.099985878966617e-36, 6.206610172341678e-36, -2.068870057447226e-35, -4.419858759091801e-36, -2.6889433275059827e-37, 1.2695338988880705e-35, -2.527312854267918e-36, 9.874152546907215e-36, 2.204051907791789e-37, 8.933757066249385e-36, -7.147005652999508e-36, -1.6456920911512025e-36, 1.1225971050352846e-36, -7.4350017689509685e-37, -3.502973165450417e-36, -3.667542374565537e-36, -4.7489971773220415e-36, -7.335084749131074e-36, -1.0579449157400588e-35, 7.099985878966617e-36, -1.786751413249877e-35, -4.913566386437162e-36, -8.228460455756013e-36, -1.2166366531010676e-36, -1.3259576277275403e-35, 3.785091809647766e-36, 4.419858759091801e-36, 1.0861567801597937e-35, 9.874152546907215e-36, 1.1990042378387333e-36, -1.1378785315959743e-35, -1.5798644075051544e-35, -1.774996469741654e-36, 6.629788138637702e-36, 3.173834747220176e-36, -1.410593220986745e-36, 3.4530146555404696e-37, -1.2695338988880705e-35, -3.738072035614874e-36, 8.651638422052036e-36, 8.087401133657338e-36, -1.0861567801597937e-35, 2.1535056507064307e-35, -1.986585452889666e-36, 3.197344634236622e-36, 2.1599708696359533e-37, -3.2473031441465692e-37, 3.173834747220176e-36, 1.96307556587322e-36, -4.3728389850589095e-36, -3.291384182302405e-36, 7.617203393328423e-36, 2.0594661026406477e-35, -1.1990042378387333e-35, -5.242704804667402e-36, 1.8337711872827685e-36, -8.087401133657338e-36, -2.256949153578792e-36, -3.92615113174644e-36, 4.161250001910898e-36, 6.112570624275895e-36, -9.309915258512517e-36, -8.181440681723121e-36, 7.617203393328423e-36, -1.3259576277275403e-35, 3.408933617384634e-36, 9.262895484479626e-36, -3.408933617384634e-36, 8.322500003821796e-36, -3.573502826499754e-36, -4.984096047486499e-36, -3.614645128778534e-37, 1.0626468931433479e-35, 1.1566864412091309e-35, -7.570183619295532e-36, 1.275411370642182e-36, 3.6205226005326455e-36, 1.476420904632793e-35, 5.5013135618483055e-36, 2.456783193218581e-36, 9.968192094972998e-36, 8.839717518183602e-36, -9.686073450775649e-36, 3.738072035614874e-36, 3.3384039563352965e-36, -1.8619830517025034e-35, -3.691052261581983e-36, 6.629788138637702e-36, 1.8220162437745456e-36, 1.2789378536946488e-35, -4.2787994369931265e-36, 3.973170905779332e-36, 1.0109251417071673e-35, 3.7145621485984285e-36, 6.958926556867942e-36, -1.692711865184094e-35, -7.85230226349288e-36, -8.228460455756013e-36, 1.7985063567581e-36, 9.4039548065783e-36, -1.0579449157400588e-36, -3.879131357713549e-36, -2.433273306202135e-36, -8.698658196084928e-36, 1.1049646897729503e-35, 1.516387712560751e-36, -6.629788138637702e-36, -6.629788138637702e-36, -4.913566386437162e-36, 8.839717518183602e-36, 1.2413220344683356e-35, -3.197344634236622e-36, -5.524823448864751e-36, -5.830451980078546e-36, 2.0923799444636718e-36, -4.9370762734536075e-36, -7.993361585591555e-36, -5.948001415160775e-36]) - if inputs_embeds is not None: hidden_states = inputs_embeds else: hidden_states = self.get_input_embeddings(input_ids) + print("embeddings: {hidden_states}") residual = None for i in range(len(self.layers)): layer = self.layers[i] From 0848b256ff3023c1dac238c95489ff938265f744 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 02:22:16 +0000 Subject: [PATCH 08/90] to try with torch.scaled_mm --- run_fp8.py | 5 ++-- .../layers/quantization/fp8_static.py | 28 ++----------------- vllm/model_executor/models/llama.py | 13 +++++++-- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 4b3833be8337e..1d7ed0e2e575b 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -1,5 +1,6 @@ -from vllm import LLM +from vllm import LLM, SamplingParams model = LLM("FriendliAI/Mistral-7B-Instruct-v0.2-fp8", quantization="fp8_static", enforce_eager=True, max_model_len=1024) # model = LLM("mistralai/Mistral-7B-Instruct-v0.2", enforce_eager=True, max_model_len=1024) -print(model.generate("Hello my name is")) \ No newline at end of file +sampling_params = SamplingParams(max_tokens=2) +print(model.generate("Hello my name is"), sampling_params) \ No newline at end of file diff --git a/vllm/model_executor/layers/quantization/fp8_static.py b/vllm/model_executor/layers/quantization/fp8_static.py index 1def8c0f9fcd3..28470d335bd50 100644 --- a/vllm/model_executor/layers/quantization/fp8_static.py +++ b/vllm/model_executor/layers/quantization/fp8_static.py @@ -149,32 +149,12 @@ def apply_weights( x_dq = self._fake_quantize_static(x, in_scale) # print(f"x_dq[0,0]: {x_dq[0,0]} // weight_dq[0,0]: {weight_dq[0,0]}") - output[:, start_offset:end_offset] = torch.nn.functional.linear(x_dq, weight_dq) + # output[:, start_offset:end_offset] = torch.nn.functional.linear(x_dq, weight_dq) + output[:, start_offset:end_offset] = torch.nn.functional.linear(x, weight_dq) start_offset = end_offset assert end_offset == output.shape[1] - # print(output) - # print(output.dtype) return output - - def _quantize_dynamic(self, x: torch.Tensor): - finfo = torch.finfo(torch.float8_e4m3fn) - min_val, max_val = x.aminmax() - amax = min_val.abs().max(max_val.abs()) - scale = finfo.max / amax.clamp(min=1e-12) - - # print(finfo.max) - # print(amax) - # print(finfo.max / amax.clamp(min=1e-12)) - # assert False - # scale and clamp the tensor to bring it to - # the representative range of float8 data type - # (as default cast is unsaturated) - qweight = (x * scale).clamp(min=finfo.min, max=finfo.max) - # Return both float8 data and the inverse scale (as float), - # as both required as inputs to torch._scaled_mm - # print(scale) - return qweight, scale.float().reciprocal() def _quantize(self, x: torch.Tensor, inv_scale: torch.tensor): finfo = torch.finfo(torch.float8_e4m3fn) @@ -185,12 +165,8 @@ def _dequantize(self, xq: torch.Tensor, inv_scale: torch.tensor, dtype: torch.dt def _fake_quantize_static(self, x: torch.Tensor, inv_scale: torch.Tensor): xq = self._quantize(x, inv_scale) - # xq, inv_scale = self._dynamic_quantize(x) - # print(inv_scale) xdq = self._dequantize(xq, inv_scale, x.dtype) - # print(f"----- inv_scale: {inv_scale} // x[0,0]: {x[0,0]} // xq[0,0]: {xq[0,0]} // xdq[0,0]: {xdq[0,0]}") - return xdq diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 8b8679b9bc1d5..cd4a310ecbcf5 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -281,7 +281,12 @@ def forward( else: hidden_states = self.get_input_embeddings(input_ids) - print("embeddings: {hidden_states}") + is_warmup = kv_caches[0] is None + + if not is_warmup: + # print(f"embeddings: {hidden_states[0]}") + pass + residual = None for i in range(len(self.layers)): layer = self.layers[i] @@ -292,9 +297,11 @@ def forward( attn_metadata, residual, ) - print(f"idx: {i}: {hidden_states}") + if not is_warmup: + pass + print(f"idx: {i}: {hidden_states}") hidden_states, _ = self.norm(hidden_states, residual) - print(hidden_states) + # print("------------") return hidden_states From 15882eaded0ba7101a87b17e7d1bd833254a90e5 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 12:06:04 +0000 Subject: [PATCH 09/90] stash --- run_fp8.py | 4 +- .../layers/quantization/fp8_static.py | 117 +++++++++++++----- vllm/model_executor/models/llama.py | 11 +- 3 files changed, 95 insertions(+), 37 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 1d7ed0e2e575b..4ffcbf3c0985d 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -1,6 +1,6 @@ from vllm import LLM, SamplingParams model = LLM("FriendliAI/Mistral-7B-Instruct-v0.2-fp8", quantization="fp8_static", enforce_eager=True, max_model_len=1024) -# model = LLM("mistralai/Mistral-7B-Instruct-v0.2", enforce_eager=True, max_model_len=1024) +# model = LLM("mistralai/Mistral-7B-Instruct-v0.2", enforce_eager=True, max_model_len=1024, quantization="fp8") sampling_params = SamplingParams(max_tokens=2) -print(model.generate("Hello my name is"), sampling_params) \ No newline at end of file +print(model.generate("What is your name"), sampling_params) \ No newline at end of file diff --git a/vllm/model_executor/layers/quantization/fp8_static.py b/vllm/model_executor/layers/quantization/fp8_static.py index 28470d335bd50..e84997a07f45b 100644 --- a/vllm/model_executor/layers/quantization/fp8_static.py +++ b/vllm/model_executor/layers/quantization/fp8_static.py @@ -22,7 +22,7 @@ def get_supported_act_dtypes(cls) -> List[torch.dtype]: @classmethod def get_min_capability(cls) -> int: - return 89 + return 90 @classmethod def get_config_filenames(cls) -> List[str]: @@ -130,49 +130,99 @@ def scales_shard_indexer( # print(f"----- loaded_weight: {loaded_weight}") return param[self.shard_id_as_int(shard_id)], loaded_weight + # def apply_weights( + # self, + # layer: torch.nn.Module, + # x: torch.Tensor, + # bias: Optional[torch.Tensor] = None + # ) -> torch.Tensor: + # logical_widths = layer.logical_widths + # q_weight = layer.weight + # w_scales = layer.weight_scale + # in_scales = layer.in_scale + + # output = torch.zeros(x.shape[0], q_weight.shape[0], dtype=x.dtype, device="cuda") + # start_offset = 0 + # for _, (logical_width, w_scale, in_scale) in enumerate(zip(logical_widths, w_scales, in_scales)): + # end_offset = start_offset + logical_width + # weight_dq = self._dequantize(q_weight[start_offset:end_offset, :], w_scale, x.dtype) + # x_dq = self._fake_quantize_static(x, in_scale) + + # # print(f"x_dq[0,0]: {x_dq[0,0]} // weight_dq[0,0]: {weight_dq[0,0]}") + # # output[:, start_offset:end_offset] = torch.nn.functional.linear(x_dq, weight_dq) + # output[:, start_offset:end_offset] = torch.nn.functional.linear(x, weight_dq) + # start_offset = end_offset + + # assert end_offset == output.shape[1] + # return output + def apply_weights( self, - layer: torch.nn.Module, - x: torch.Tensor, - bias: Optional[torch.Tensor] = None - ) -> torch.Tensor: - logical_widths = layer.logical_widths - q_weight = layer.weight - w_scales = layer.weight_scale - in_scales = layer.in_scale - - output = torch.zeros(x.shape[0], q_weight.shape[0], dtype=x.dtype, device="cuda") + layer, + x, + bias=None + ): + # print(sum(x)) + # assert False + # qinput, x_scale = per_tensor_quantize(x) + # print(qinput) + # assert False + output = torch.zeros(x.shape[0], layer.weight.shape[0], dtype=x.dtype, device="cuda") start_offset = 0 - for _, (logical_width, w_scale, in_scale) in enumerate(zip(logical_widths, w_scales, in_scales)): + print("\n----") + + for _, (logical_width, w_scale, in_scale) in enumerate(zip(layer.logical_widths, layer.weight_scale, layer.in_scale)): end_offset = start_offset + logical_width - weight_dq = self._dequantize(q_weight[start_offset:end_offset, :], w_scale, x.dtype) - x_dq = self._fake_quantize_static(x, in_scale) - - # print(f"x_dq[0,0]: {x_dq[0,0]} // weight_dq[0,0]: {weight_dq[0,0]}") - # output[:, start_offset:end_offset] = torch.nn.functional.linear(x_dq, weight_dq) - output[:, start_offset:end_offset] = torch.nn.functional.linear(x, weight_dq) + print(f"(start,end) = ({start_offset}, {end_offset})") + + q_weight = layer.weight[start_offset:end_offset, :].t() + q_input = self._quantize(x, inv_scale=in_scale) + x_scale = in_scale + # print(f"in_scale: {in_scale}") + # print(f"w_scale: {w_scale}") + # print(f"input: {x}") + # print(f"q_input: {q_input}") + # print(f"q_weight: {q_weight}") + # q_input, x_scale = per_tensor_quantize(x) + + assert not torch.isnan(q_input[0,0]) + + out, _ = torch._scaled_mm( + q_input, + q_weight, + out_dtype=x.dtype, + scale_a=x_scale.float(), + scale_b=w_scale.float(), + bias=bias, + ) + print(f"out.norm(): {out.norm()}") + output[:, start_offset:end_offset] = out start_offset = end_offset assert end_offset == output.shape[1] + # print(output.sum(dim=0).shape) + # print(output.sum(dim=1).shape) + + # print(output.norm(), output.norm(dim=0), output.norm(dim=1)) return output - - def _quantize(self, x: torch.Tensor, inv_scale: torch.tensor): + + def _quantize(self, tensor: torch.Tensor, inv_scale: torch.tensor): finfo = torch.finfo(torch.float8_e4m3fn) - return (x / inv_scale).clamp(min=finfo.min, max=finfo.max) + qtensor = (tensor / inv_scale).clamp(min=finfo.min, max=finfo.max) + return qtensor.to(torch.float8_e4m3fn) - def _dequantize(self, xq: torch.Tensor, inv_scale: torch.tensor, dtype: torch.dtype): - return (xq.to(dtype) * inv_scale) + # def _dequantize(self, xq: torch.Tensor, inv_scale: torch.tensor, dtype: torch.dtype): + # return (xq.to(dtype) * inv_scale) - def _fake_quantize_static(self, x: torch.Tensor, inv_scale: torch.Tensor): - xq = self._quantize(x, inv_scale) - xdq = self._dequantize(xq, inv_scale, x.dtype) - # print(f"----- inv_scale: {inv_scale} // x[0,0]: {x[0,0]} // xq[0,0]: {xq[0,0]} // xdq[0,0]: {xdq[0,0]}") - return xdq + # def _fake_quantize_static(self, x: torch.Tensor, inv_scale: torch.Tensor): + # xq = self._quantize(x, inv_scale) + # xdq = self._dequantize(xq, inv_scale, x.dtype) + # # print(f"----- inv_scale: {inv_scale} // x[0,0]: {x[0,0]} // xq[0,0]: {xq[0,0]} // xdq[0,0]: {xdq[0,0]}") + # return xdq -def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]: +def per_tensor_quantize(tensor: torch.Tensor) -> tuple[torch.Tensor, float]: """Quantize a tensor using per-tensor static scaling factor. - Args: tensor: The input tensor. """ @@ -181,14 +231,19 @@ def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]: # Since .abs() creates a new tensor, we use aminmax to get # the min and max first and then calculate the absmax. min_val, max_val = tensor.aminmax() + print(min_val) + print(max_val) amax = min_val.abs().max(max_val.abs()) + print(amax) scale = finfo.max / amax.clamp(min=1e-12) + print(scale) # scale and clamp the tensor to bring it to # the representative range of float8 data type # (as default cast is unsaturated) qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max) + print(qweight) # Return both float8 data and the inverse scale (as float), # as both required as inputs to torch._scaled_mm qweight = qweight.to(torch.float8_e4m3fn) scale = scale.float().reciprocal() - return qweight, scale + return qweight, scale \ No newline at end of file diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index cd4a310ecbcf5..c9c2097ce6267 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -283,8 +283,9 @@ def forward( is_warmup = kv_caches[0] is None - if not is_warmup: - # print(f"embeddings: {hidden_states[0]}") + if not is_warmup: + print("NOT IN WARMUP") + print(f"embeddings: {hidden_states[0]}") pass residual = None @@ -298,10 +299,12 @@ def forward( residual, ) if not is_warmup: + assert False + # print(f"idx: {i}: {hidden_states}") pass - print(f"idx: {i}: {hidden_states}") hidden_states, _ = self.norm(hidden_states, residual) - # print("------------") + if not is_warmup: + assert False return hidden_states From 7e6b675cfdc5272735d369e2c29155441a59e31e Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 12:58:45 +0000 Subject: [PATCH 10/90] added way to do weight quantization --- quantize.ipynb | 250 ++++++++++++++++++ run_fp8.py | 2 +- .../model_executor/layers/quantization/fp8.py | 5 + .../layers/quantization/fp8_static.py | 143 +++++----- 4 files changed, 340 insertions(+), 60 deletions(-) create mode 100644 quantize.ipynb diff --git a/quantize.ipynb b/quantize.ipynb new file mode 100644 index 0000000000000..17efdc17fd39b --- /dev/null +++ b/quantize.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/paperspace/env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00, 1.94it/s]\n" + ] + } + ], + "source": [ + "import torch\n", + "from transformers import AutoModelForCausalLM\n", + "\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " \"mistralai/Mistral-7B-Instruct-v0.2\",\n", + " torch_dtype=torch.bfloat16,\n", + ")\n", + "model = model.to(\"cuda\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Tuple\n", + "\n", + "def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]:\n", + " \"\"\"Quantize a tensor using per-tensor static scaling factor.\n", + "\n", + " Args:\n", + " tensor: The input tensor.\n", + " \"\"\"\n", + " finfo = torch.finfo(torch.float8_e4m3fn)\n", + " # Calculate the scale as dtype max divided by absmax.\n", + " # Since .abs() creates a new tensor, we use aminmax to get\n", + " # the min and max first and then calculate the absmax.\n", + " min_val, max_val = tensor.aminmax()\n", + " amax = min_val.abs().max(max_val.abs())\n", + " scale = finfo.max / amax.clamp(min=1e-12)\n", + " # scale and clamp the tensor to bring it to\n", + " # the representative range of float8 data type\n", + " # (as default cast is unsaturated)\n", + " qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max)\n", + " # Return both float8 data and the inverse scale (as float),\n", + " # as both required as inputs to torch._scaled_mm\n", + " qweight = qweight.to(torch.float8_e4m3fn)\n", + " scale = scale.float().reciprocal()\n", + " return qweight, scale" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "class LinearFP8(torch.nn.Module):\n", + " def __init__(self, qweight, scale):\n", + " super().__init__()\n", + " self.weight = torch.nn.Parameter(qweight, requires_grad=False)\n", + " self.weight_scale = torch.nn.Parameter(scale, requires_grad=False)\n", + " \n", + " def forward(self, x):\n", + " shape = x.shape\n", + " x = x.reshape(-1, shape[-1])\n", + " qinput, x_scale = per_tensor_quantize(x)\n", + " \n", + " output, _ = torch._scaled_mm(\n", + " qinput,\n", + " self.weight.t(),\n", + " out_dtype=x.dtype,\n", + " scale_a=x_scale,\n", + " scale_b=self.weight_scale,\n", + " bias=None,\n", + " )\n", + " return output.reshape(shape[0], shape[1], -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "SELF_ATTN_WEIGHTS = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\"]\n", + "MLP_WEIGHTS = [\"gate_proj\", \"up_proj\", \"down_proj\"]\n", + "\n", + "def quantize_proj(module, proj_name):\n", + " proj = getattr(module, proj_name)\n", + " quant_weight, quant_scale = per_tensor_quantize(proj.weight)\n", + " quant_proj = LinearFP8(quant_weight, quant_scale)\n", + " \n", + " del proj\n", + " setattr(module, proj_name, quant_proj)\n", + "\n", + "for layer in model.model.layers:\n", + " for proj_name in SELF_ATTN_WEIGHTS:\n", + " quantize_proj(layer.self_attn, proj_name)\n", + " for proj_name in MLP_WEIGHTS:\n", + " quantize_proj(layer.mlp, proj_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MistralForCausalLM(\n", + " (model): MistralModel(\n", + " (embed_tokens): Embedding(32000, 4096)\n", + " (layers): ModuleList(\n", + " (0-31): 32 x MistralDecoderLayer(\n", + " (self_attn): MistralSdpaAttention(\n", + " (q_proj): LinearFP8()\n", + " (k_proj): LinearFP8()\n", + " (v_proj): LinearFP8()\n", + " (o_proj): LinearFP8()\n", + " (rotary_emb): MistralRotaryEmbedding()\n", + " )\n", + " (mlp): MistralMLP(\n", + " (gate_proj): LinearFP8()\n", + " (up_proj): LinearFP8()\n", + " (down_proj): LinearFP8()\n", + " (act_fn): SiLU()\n", + " )\n", + " (input_layernorm): MistralRMSNorm()\n", + " (post_attention_layernorm): MistralRMSNorm()\n", + " )\n", + " )\n", + " (norm): MistralRMSNorm()\n", + " )\n", + " (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n", + ")" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoTokenizer\n", + "tokenizer = AutoTokenizer.from_pretrained(\n", + " \"mistralai/Mistral-7B-Instruct-v0.2\"\n", + ")\n", + "tokenizer.pad_token_id = tokenizer.eos_token_id" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "input_ids = tokenizer.apply_chat_template(\n", + " [{\"role\": \"user\", \"content\": \"What is your name?\" }],\n", + " return_tensors=\"pt\"\n", + ").to(\"cuda\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " [INST] What is your name? [/INST] I don't have a name. I'm just a computer program designed to assist with information\n" + ] + } + ], + "source": [ + "output = model.generate(input_ids=input_ids, max_new_tokens=20)\n", + "print(tokenizer.decode(output[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# hacked transformers/modeling_utils/dtype_byte_size to make this work\n", + "model.save_pretrained(\"mistral-fp8-static\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mod" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/run_fp8.py b/run_fp8.py index 4ffcbf3c0985d..afcb0b5e58415 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -1,6 +1,6 @@ from vllm import LLM, SamplingParams -model = LLM("FriendliAI/Mistral-7B-Instruct-v0.2-fp8", quantization="fp8_static", enforce_eager=True, max_model_len=1024) +model = LLM("nm-testing/mistral-fp8-test", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", quantization="fp8_static", enforce_eager=True, max_model_len=1024) # model = LLM("mistralai/Mistral-7B-Instruct-v0.2", enforce_eager=True, max_model_len=1024, quantization="fp8") sampling_params = SamplingParams(max_tokens=2) print(model.generate("What is your name"), sampling_params) \ No newline at end of file diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 01e494c870e71..6ec3802c6318a 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -104,6 +104,10 @@ def apply_weights(self, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: qinput, x_scale = per_tensor_quantize(x) + print(f"w_scale: {layer.weight_scaling_factor.item()}") + print(f"x_scale: {x_scale.item()}") + print(f"q_input: {qinput}") + print(f"weight: {layer.weight}") output, _ = torch._scaled_mm( qinput, layer.weight, @@ -112,6 +116,7 @@ def apply_weights(self, scale_b=layer.weight_scaling_factor, bias=bias, ) + return output diff --git a/vllm/model_executor/layers/quantization/fp8_static.py b/vllm/model_executor/layers/quantization/fp8_static.py index e84997a07f45b..9a7c374fde1e4 100644 --- a/vllm/model_executor/layers/quantization/fp8_static.py +++ b/vllm/model_executor/layers/quantization/fp8_static.py @@ -81,17 +81,17 @@ def create_weights( "shard_indexer": self.scales_shard_indexer, }) - in_scale = Parameter( - torch.empty( - len(output_partition_sizes), - device='cuda', dtype=torch.float32, - ), requires_grad=False - ) - layer.register_parameter("in_scale", in_scale) - set_weight_attrs(in_scale, extra_weight_attrs) - set_weight_attrs(in_scale, { - "shard_indexer": self.scales_shard_indexer, - }) + # in_scale = Parameter( + # torch.empty( + # len(output_partition_sizes), + # device='cuda', dtype=torch.float32, + # ), requires_grad=False + # ) + # layer.register_parameter("in_scale", in_scale) + # set_weight_attrs(in_scale, extra_weight_attrs) + # set_weight_attrs(in_scale, { + # "shard_indexer": self.scales_shard_indexer, + # }) layer.logical_widths = output_partition_sizes @@ -130,6 +130,36 @@ def scales_shard_indexer( # print(f"----- loaded_weight: {loaded_weight}") return param[self.shard_id_as_int(shard_id)], loaded_weight + def apply_weights(self, + layer: torch.nn.Module, + x: torch.Tensor, + bias: Optional[torch.Tensor] = None) -> torch.Tensor: + logical_widths = layer.logical_widths + q_weight = layer.weight + w_scales = layer.weight_scale + + qinput, x_scale = per_tensor_quantize(x) + + output = torch.zeros(x.shape[0], q_weight.shape[0], dtype=x.dtype, device="cuda") + start_offset = 0 + for _, (logical_width, w_scale) in enumerate(zip(logical_widths, w_scales)): + end_offset = start_offset + logical_width + q_weight = layer.weight[start_offset:end_offset, :].t() + + out, _ = torch._scaled_mm( + qinput, + q_weight, + out_dtype=x.dtype, + scale_a=x_scale, + scale_b=w_scale, + bias=bias, + ) + output[:, start_offset:end_offset] = out + start_offset = end_offset + + assert end_offset == output.shape[1] + return output + # def apply_weights( # self, # layer: torch.nn.Module, @@ -156,55 +186,55 @@ def scales_shard_indexer( # assert end_offset == output.shape[1] # return output - def apply_weights( - self, - layer, - x, - bias=None - ): - # print(sum(x)) - # assert False - # qinput, x_scale = per_tensor_quantize(x) - # print(qinput) - # assert False - output = torch.zeros(x.shape[0], layer.weight.shape[0], dtype=x.dtype, device="cuda") - start_offset = 0 - print("\n----") + # def apply_weights( + # self, + # layer, + # x, + # bias=None + # ): + # # print(sum(x)) + # # assert False + # # qinput, x_scale = per_tensor_quantize(x) + # # print(qinput) + # # assert False + # output = torch.zeros(x.shape[0], layer.weight.shape[0], dtype=x.dtype, device="cuda") + # start_offset = 0 + # print("\n----") - for _, (logical_width, w_scale, in_scale) in enumerate(zip(layer.logical_widths, layer.weight_scale, layer.in_scale)): - end_offset = start_offset + logical_width - print(f"(start,end) = ({start_offset}, {end_offset})") + # for _, (logical_width, w_scale, in_scale) in enumerate(zip(layer.logical_widths, layer.weight_scale, layer.in_scale)): + # end_offset = start_offset + logical_width + # print(f"(start,end) = ({start_offset}, {end_offset})") - q_weight = layer.weight[start_offset:end_offset, :].t() - q_input = self._quantize(x, inv_scale=in_scale) - x_scale = in_scale - # print(f"in_scale: {in_scale}") - # print(f"w_scale: {w_scale}") - # print(f"input: {x}") - # print(f"q_input: {q_input}") - # print(f"q_weight: {q_weight}") - # q_input, x_scale = per_tensor_quantize(x) + # q_weight = layer.weight[start_offset:end_offset, :].t() + # q_input = self._quantize(x, inv_scale=in_scale) + # x_scale = in_scale + # # print(f"in_scale: {in_scale}") + # # print(f"w_scale: {w_scale}") + # # print(f"input: {x}") + # # print(f"q_input: {q_input}") + # # print(f"q_weight: {q_weight}") + # # q_input, x_scale = per_tensor_quantize(x) - assert not torch.isnan(q_input[0,0]) + # assert not torch.isnan(q_input[0,0]) - out, _ = torch._scaled_mm( - q_input, - q_weight, - out_dtype=x.dtype, - scale_a=x_scale.float(), - scale_b=w_scale.float(), - bias=bias, - ) - print(f"out.norm(): {out.norm()}") - output[:, start_offset:end_offset] = out - start_offset = end_offset + # out, _ = torch._scaled_mm( + # q_input, + # q_weight * w_scale, + # out_dtype=x.dtype, + # scale_a=x_scale.float(), + # scale_b=w_scale.float(), + # bias=bias, + # ) + # print(f"out.norm(): {out.norm()}") + # output[:, start_offset:end_offset] = out + # start_offset = end_offset - assert end_offset == output.shape[1] - # print(output.sum(dim=0).shape) - # print(output.sum(dim=1).shape) + # assert end_offset == output.shape[1] + # # print(output.sum(dim=0).shape) + # # print(output.sum(dim=1).shape) - # print(output.norm(), output.norm(dim=0), output.norm(dim=1)) - return output + # # print(output.norm(), output.norm(dim=0), output.norm(dim=1)) + # return output def _quantize(self, tensor: torch.Tensor, inv_scale: torch.tensor): finfo = torch.finfo(torch.float8_e4m3fn) @@ -231,17 +261,12 @@ def per_tensor_quantize(tensor: torch.Tensor) -> tuple[torch.Tensor, float]: # Since .abs() creates a new tensor, we use aminmax to get # the min and max first and then calculate the absmax. min_val, max_val = tensor.aminmax() - print(min_val) - print(max_val) amax = min_val.abs().max(max_val.abs()) - print(amax) scale = finfo.max / amax.clamp(min=1e-12) - print(scale) # scale and clamp the tensor to bring it to # the representative range of float8 data type # (as default cast is unsaturated) qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max) - print(qweight) # Return both float8 data and the inverse scale (as float), # as both required as inputs to torch._scaled_mm qweight = qweight.to(torch.float8_e4m3fn) From cc959ea08db3ea7ba99f894e59ee0ca16168dba9 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 13:05:45 +0000 Subject: [PATCH 11/90] working! --- run_fp8.py | 14 ++- .../model_executor/layers/quantization/fp8.py | 4 - .../layers/quantization/fp8_static.py | 92 +------------------ vllm/model_executor/models/llama.py | 8 +- 4 files changed, 15 insertions(+), 103 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index afcb0b5e58415..b9fac76e0c1ca 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -1,6 +1,12 @@ -from vllm import LLM, SamplingParams +from vllm import LLM +from transformers import AutoTokenizer model = LLM("nm-testing/mistral-fp8-test", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", quantization="fp8_static", enforce_eager=True, max_model_len=1024) -# model = LLM("mistralai/Mistral-7B-Instruct-v0.2", enforce_eager=True, max_model_len=1024, quantization="fp8") -sampling_params = SamplingParams(max_tokens=2) -print(model.generate("What is your name"), sampling_params) \ No newline at end of file +tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") + +prompt = tokenizer.apply_chat_template([{"role": "user", "content": "What is your name"}], tokenize=False, add_generation_prompt=True) +print(f"----- Prompt: {prompt}") + +outputs = model.generate(prompt) +generation = outputs[0].outputs[0].text +print(f"----- Generation: {generation}") \ No newline at end of file diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 6ec3802c6318a..983a63e124ae8 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -104,10 +104,6 @@ def apply_weights(self, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: qinput, x_scale = per_tensor_quantize(x) - print(f"w_scale: {layer.weight_scaling_factor.item()}") - print(f"x_scale: {x_scale.item()}") - print(f"q_input: {qinput}") - print(f"weight: {layer.weight}") output, _ = torch._scaled_mm( qinput, layer.weight, diff --git a/vllm/model_executor/layers/quantization/fp8_static.py b/vllm/model_executor/layers/quantization/fp8_static.py index 9a7c374fde1e4..24dc498fd2c18 100644 --- a/vllm/model_executor/layers/quantization/fp8_static.py +++ b/vllm/model_executor/layers/quantization/fp8_static.py @@ -141,6 +141,8 @@ def apply_weights(self, qinput, x_scale = per_tensor_quantize(x) output = torch.zeros(x.shape[0], q_weight.shape[0], dtype=x.dtype, device="cuda") + + # FOR LOOP TO BE REPLACED BY CUTLASS KERNEL start_offset = 0 for _, (logical_width, w_scale) in enumerate(zip(logical_widths, w_scales)): end_offset = start_offset + logical_width @@ -159,96 +161,6 @@ def apply_weights(self, assert end_offset == output.shape[1] return output - - # def apply_weights( - # self, - # layer: torch.nn.Module, - # x: torch.Tensor, - # bias: Optional[torch.Tensor] = None - # ) -> torch.Tensor: - # logical_widths = layer.logical_widths - # q_weight = layer.weight - # w_scales = layer.weight_scale - # in_scales = layer.in_scale - - # output = torch.zeros(x.shape[0], q_weight.shape[0], dtype=x.dtype, device="cuda") - # start_offset = 0 - # for _, (logical_width, w_scale, in_scale) in enumerate(zip(logical_widths, w_scales, in_scales)): - # end_offset = start_offset + logical_width - # weight_dq = self._dequantize(q_weight[start_offset:end_offset, :], w_scale, x.dtype) - # x_dq = self._fake_quantize_static(x, in_scale) - - # # print(f"x_dq[0,0]: {x_dq[0,0]} // weight_dq[0,0]: {weight_dq[0,0]}") - # # output[:, start_offset:end_offset] = torch.nn.functional.linear(x_dq, weight_dq) - # output[:, start_offset:end_offset] = torch.nn.functional.linear(x, weight_dq) - # start_offset = end_offset - - # assert end_offset == output.shape[1] - # return output - - # def apply_weights( - # self, - # layer, - # x, - # bias=None - # ): - # # print(sum(x)) - # # assert False - # # qinput, x_scale = per_tensor_quantize(x) - # # print(qinput) - # # assert False - # output = torch.zeros(x.shape[0], layer.weight.shape[0], dtype=x.dtype, device="cuda") - # start_offset = 0 - # print("\n----") - - # for _, (logical_width, w_scale, in_scale) in enumerate(zip(layer.logical_widths, layer.weight_scale, layer.in_scale)): - # end_offset = start_offset + logical_width - # print(f"(start,end) = ({start_offset}, {end_offset})") - - # q_weight = layer.weight[start_offset:end_offset, :].t() - # q_input = self._quantize(x, inv_scale=in_scale) - # x_scale = in_scale - # # print(f"in_scale: {in_scale}") - # # print(f"w_scale: {w_scale}") - # # print(f"input: {x}") - # # print(f"q_input: {q_input}") - # # print(f"q_weight: {q_weight}") - # # q_input, x_scale = per_tensor_quantize(x) - - # assert not torch.isnan(q_input[0,0]) - - # out, _ = torch._scaled_mm( - # q_input, - # q_weight * w_scale, - # out_dtype=x.dtype, - # scale_a=x_scale.float(), - # scale_b=w_scale.float(), - # bias=bias, - # ) - # print(f"out.norm(): {out.norm()}") - # output[:, start_offset:end_offset] = out - # start_offset = end_offset - - # assert end_offset == output.shape[1] - # # print(output.sum(dim=0).shape) - # # print(output.sum(dim=1).shape) - - # # print(output.norm(), output.norm(dim=0), output.norm(dim=1)) - # return output - - def _quantize(self, tensor: torch.Tensor, inv_scale: torch.tensor): - finfo = torch.finfo(torch.float8_e4m3fn) - qtensor = (tensor / inv_scale).clamp(min=finfo.min, max=finfo.max) - return qtensor.to(torch.float8_e4m3fn) - - # def _dequantize(self, xq: torch.Tensor, inv_scale: torch.tensor, dtype: torch.dtype): - # return (xq.to(dtype) * inv_scale) - - # def _fake_quantize_static(self, x: torch.Tensor, inv_scale: torch.Tensor): - # xq = self._quantize(x, inv_scale) - # xdq = self._dequantize(xq, inv_scale, x.dtype) - # # print(f"----- inv_scale: {inv_scale} // x[0,0]: {x[0,0]} // xq[0,0]: {xq[0,0]} // xdq[0,0]: {xdq[0,0]}") - # return xdq def per_tensor_quantize(tensor: torch.Tensor) -> tuple[torch.Tensor, float]: diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index c9c2097ce6267..58a895ac7de33 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -284,8 +284,8 @@ def forward( is_warmup = kv_caches[0] is None if not is_warmup: - print("NOT IN WARMUP") - print(f"embeddings: {hidden_states[0]}") + # print("NOT IN WARMUP") + # print(f"embeddings: {hidden_states[0]}") pass residual = None @@ -299,12 +299,10 @@ def forward( residual, ) if not is_warmup: - assert False + # assert False # print(f"idx: {i}: {hidden_states}") pass hidden_states, _ = self.norm(hidden_states, residual) - if not is_warmup: - assert False return hidden_states From 8d68dbc6c2848ee2ca6091d75be1965ecafb758d Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 13:08:06 +0000 Subject: [PATCH 12/90] fixed llama --- vllm/model_executor/models/llama.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 58a895ac7de33..5021b9f66eadd 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -280,13 +280,6 @@ def forward( hidden_states = inputs_embeds else: hidden_states = self.get_input_embeddings(input_ids) - - is_warmup = kv_caches[0] is None - - if not is_warmup: - # print("NOT IN WARMUP") - # print(f"embeddings: {hidden_states[0]}") - pass residual = None for i in range(len(self.layers)): @@ -298,10 +291,7 @@ def forward( attn_metadata, residual, ) - if not is_warmup: - # assert False - # print(f"idx: {i}: {hidden_states}") - pass + hidden_states, _ = self.norm(hidden_states, residual) return hidden_states From 881fc65c8676e9bc76ae628de217a3edaa7494d7 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 13:08:36 +0000 Subject: [PATCH 13/90] fixed llama again --- vllm/model_executor/models/llama.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 5021b9f66eadd..016e3b039d1e8 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -280,7 +280,6 @@ def forward( hidden_states = inputs_embeds else: hidden_states = self.get_input_embeddings(input_ids) - residual = None for i in range(len(self.layers)): layer = self.layers[i] @@ -291,7 +290,6 @@ def forward( attn_metadata, residual, ) - hidden_states, _ = self.norm(hidden_states, residual) return hidden_states From e6dd46f18a1184277742572268156e9721e71f85 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 13:16:07 +0000 Subject: [PATCH 14/90] updated names --- run_fp8.py | 2 +- .../layers/quantization/__init__.py | 4 +- .../layers/quantization/fp8_static.py | 186 ------------------ 3 files changed, 3 insertions(+), 189 deletions(-) delete mode 100644 vllm/model_executor/layers/quantization/fp8_static.py diff --git a/run_fp8.py b/run_fp8.py index b9fac76e0c1ca..84bef6578301e 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -1,7 +1,7 @@ from vllm import LLM from transformers import AutoTokenizer -model = LLM("nm-testing/mistral-fp8-test", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", quantization="fp8_static", enforce_eager=True, max_model_len=1024) +model = LLM("nm-testing/mistral-fp8-test", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", quantization="fp8_serialized", enforce_eager=True, max_model_len=1024) tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") prompt = tokenizer.apply_chat_template([{"role": "user", "content": "What is your name"}], tokenize=False, add_generation_prompt=True) diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py index c139331307872..dbbe466dd7a94 100644 --- a/vllm/model_executor/layers/quantization/__init__.py +++ b/vllm/model_executor/layers/quantization/__init__.py @@ -5,7 +5,7 @@ from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) from vllm.model_executor.layers.quantization.fp8 import FP8Config -from vllm.model_executor.layers.quantization.fp8_static import FP8StaticConfig +from vllm.model_executor.layers.quantization.fp8_serialized import FP8SerializedConfig from vllm.model_executor.layers.quantization.gptq import GPTQConfig from vllm.model_executor.layers.quantization.marlin import MarlinConfig from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig @@ -14,7 +14,7 @@ "aqlm": AQLMConfig, "awq": AWQConfig, "fp8": FP8Config, - "fp8_static": FP8StaticConfig, + "fp8_serialized": FP8SerializedConfig, "gptq": GPTQConfig, "squeezellm": SqueezeLLMConfig, "marlin": MarlinConfig, diff --git a/vllm/model_executor/layers/quantization/fp8_static.py b/vllm/model_executor/layers/quantization/fp8_static.py deleted file mode 100644 index 24dc498fd2c18..0000000000000 --- a/vllm/model_executor/layers/quantization/fp8_static.py +++ /dev/null @@ -1,186 +0,0 @@ -from typing import Any, Dict, List, Optional, Tuple, Union - -import torch -from torch.nn.parameter import Parameter - -from vllm.model_executor.layers.linear import (LinearMethodBase, - set_weight_attrs) -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) - - -class FP8StaticConfig(QuantizationConfig): - """Config class for FP8.""" - - @classmethod - def get_name(cls) -> str: - return "fp8_static" - - @classmethod - def get_supported_act_dtypes(cls) -> List[torch.dtype]: - return [torch.bfloat16, torch.half] - - @classmethod - def get_min_capability(cls) -> int: - return 90 - - @classmethod - def get_config_filenames(cls) -> List[str]: - return [] - - @classmethod - def from_config(cls, config: Dict[str, Any]) -> "FP8StaticConfig": - return cls() - - def get_linear_method(self) -> "Fp8LinearMethod": - return Fp8LinearMethod(self) - - def get_scaled_act_names(self) -> List[str]: - return [] - - -class Fp8LinearMethod(LinearMethodBase): - """Linear method for StaticFP8 - . - Args: - quant_config: The quantization config. - """ - - def __init__(self, quant_config: FP8StaticConfig): - self.quant_config = quant_config - - def create_weights( - self, - layer: torch.nn.Module, - input_size_per_partition: int, - output_partition_sizes: List[int], - input_size: int, - output_size: int, - params_dtype: torch.dtype, - **extra_weight_attrs, - ): - del input_size, output_size - - weight = Parameter(torch.empty(sum(output_partition_sizes), - input_size_per_partition, - dtype=torch.float8_e4m3fn), - requires_grad=False) - layer.register_parameter("weight", weight) - set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) - set_weight_attrs(weight, extra_weight_attrs) - - weight_scale = Parameter( - torch.empty( - len(output_partition_sizes), - device='cuda', dtype=torch.float32, - ), requires_grad=False - ) - layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs(weight_scale, extra_weight_attrs) - set_weight_attrs(weight_scale, { - "shard_indexer": self.scales_shard_indexer, - }) - - # in_scale = Parameter( - # torch.empty( - # len(output_partition_sizes), - # device='cuda', dtype=torch.float32, - # ), requires_grad=False - # ) - # layer.register_parameter("in_scale", in_scale) - # set_weight_attrs(in_scale, extra_weight_attrs) - # set_weight_attrs(in_scale, { - # "shard_indexer": self.scales_shard_indexer, - # }) - - layer.logical_widths = output_partition_sizes - - def shard_id_as_int( - self, - shard_id: Union[str, int] - ) -> int: - if isinstance(shard_id, int): - return shard_id - assert isinstance(shard_id, str) - qkv_idxs = { "q": 0, "k": 1, "v": 2 } - assert shard_id in qkv_idxs - return qkv_idxs[shard_id] - - # def scales_shard_splitter_NKK( - # self, - # param: torch.Tensor, - # loaded_weight: torch.Tensor, - # shard_id: Union[str, int], - # logical_widths: torch.Tensor - # ) -> Tuple[torch.Tensor, torch.Tensor]: - # shard_id = self.shard_id_as_int(shard_id) - # offset = sum(logical_widths[:shard_id]) - # size = logical_widths[shard_id] - # # update loaded weight with copies for broadcast. - # loaded_weight = loaded_weight.repeat(size) - # return param[offset : offset + size], loaded_weight - - def scales_shard_indexer( - self, - param: torch.Tensor, - loaded_weight: torch.Tensor, - shard_id: Union[str, int], - ) -> Tuple[torch.Tensor, torch.Tensor]: - # print(f"----- shard_id: {shard_id}") - # print(f"----- loaded_weight: {loaded_weight}") - return param[self.shard_id_as_int(shard_id)], loaded_weight - - def apply_weights(self, - layer: torch.nn.Module, - x: torch.Tensor, - bias: Optional[torch.Tensor] = None) -> torch.Tensor: - logical_widths = layer.logical_widths - q_weight = layer.weight - w_scales = layer.weight_scale - - qinput, x_scale = per_tensor_quantize(x) - - output = torch.zeros(x.shape[0], q_weight.shape[0], dtype=x.dtype, device="cuda") - - # FOR LOOP TO BE REPLACED BY CUTLASS KERNEL - start_offset = 0 - for _, (logical_width, w_scale) in enumerate(zip(logical_widths, w_scales)): - end_offset = start_offset + logical_width - q_weight = layer.weight[start_offset:end_offset, :].t() - - out, _ = torch._scaled_mm( - qinput, - q_weight, - out_dtype=x.dtype, - scale_a=x_scale, - scale_b=w_scale, - bias=bias, - ) - output[:, start_offset:end_offset] = out - start_offset = end_offset - - assert end_offset == output.shape[1] - return output - - -def per_tensor_quantize(tensor: torch.Tensor) -> tuple[torch.Tensor, float]: - """Quantize a tensor using per-tensor static scaling factor. - Args: - tensor: The input tensor. - """ - finfo = torch.finfo(torch.float8_e4m3fn) - # Calculate the scale as dtype max divided by absmax. - # Since .abs() creates a new tensor, we use aminmax to get - # the min and max first and then calculate the absmax. - min_val, max_val = tensor.aminmax() - amax = min_val.abs().max(max_val.abs()) - scale = finfo.max / amax.clamp(min=1e-12) - # scale and clamp the tensor to bring it to - # the representative range of float8 data type - # (as default cast is unsaturated) - qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max) - # Return both float8 data and the inverse scale (as float), - # as both required as inputs to torch._scaled_mm - qweight = qweight.to(torch.float8_e4m3fn) - scale = scale.float().reciprocal() - return qweight, scale \ No newline at end of file From 7e3933b96fb390cbd7845c76b8dc4ad8083401ff Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 13:16:53 +0000 Subject: [PATCH 15/90] nit --- run_fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_fp8.py b/run_fp8.py index 84bef6578301e..16b1fab255293 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -9,4 +9,4 @@ outputs = model.generate(prompt) generation = outputs[0].outputs[0].text -print(f"----- Generation: {generation}") \ No newline at end of file +print(f"----- Generation: {generation}") From 453a236cd05bdfea722bca52a1777a584954e1ab Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 13:22:47 +0000 Subject: [PATCH 16/90] cleanup --- .buildkite/test-pipeline.yaml | 3 --- tests/quantization/test_fp8.py | 9 --------- 2 files changed, 12 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 11cda053260ec..f7c1569696249 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -96,9 +96,6 @@ steps: - label: Metrics Test command: pytest -v -s metrics -- label: Quantization Test - command: pytest -v -s quantization - - label: Benchmarks working_dir: "/vllm-workspace/.buildkite" commands: diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index 6e38d570f8541..43300a72a86c7 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -12,15 +12,6 @@ capability = torch.cuda.get_device_capability() capability = capability[0] * 10 + capability[1] -@pytest.mark.skipif( - capability < QUANTIZATION_METHODS["fp8_static"].get_min_capability(), - reason="FP8 is not supported on this GPU type.") -def test_load_static_model(vllm_runner) -> None: - llm = vllm_runner("FriendliAI/Llama-2-7b-chat-hf-fp8", quantization="fp8_static", enforce_eager=True) - model = llm.model.llm_engine.model_executor.driver_worker.model_runner.model - fc1 = model.model.decoder.layers[0].fc1 - assert isinstance(fc1.linear_method, Fp8LinearMethod) - print(llm.generate("Hello my name is", SamplingParams(max_tokens=20))) @pytest.mark.skipif( capability < QUANTIZATION_METHODS["fp8"].get_min_capability(), From 310e0a7ad0fb1383a9980ee47d0cb0a8766848da Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 13:23:06 +0000 Subject: [PATCH 17/90] cleanup --- tests/quantization/test_fp8.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index 43300a72a86c7..fa10e60de10a7 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -5,7 +5,6 @@ import pytest import torch -from vllm import SamplingParams from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.model_executor.layers.quantization.fp8 import Fp8LinearMethod From ab4cb0209a2425a39b249379dc92ae5727b2d909 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Wed, 24 Apr 2024 13:23:47 +0000 Subject: [PATCH 18/90] missed file :) --- .../layers/quantization/fp8_serialized.py | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 vllm/model_executor/layers/quantization/fp8_serialized.py diff --git a/vllm/model_executor/layers/quantization/fp8_serialized.py b/vllm/model_executor/layers/quantization/fp8_serialized.py new file mode 100644 index 0000000000000..933a70a7d6d10 --- /dev/null +++ b/vllm/model_executor/layers/quantization/fp8_serialized.py @@ -0,0 +1,166 @@ +from typing import Any, Dict, List, Optional, Tuple, Union + +import torch +from torch.nn.parameter import Parameter + +from vllm.model_executor.layers.linear import (LinearMethodBase, + set_weight_attrs) +from vllm.model_executor.layers.quantization.base_config import ( + QuantizationConfig) + + +class FP8SerializedConfig(QuantizationConfig): + """Config class for FP8.""" + + @classmethod + def get_name(cls) -> str: + return "fp8_static" + + @classmethod + def get_supported_act_dtypes(cls) -> List[torch.dtype]: + return [torch.bfloat16, torch.half] + + @classmethod + def get_min_capability(cls) -> int: + return 90 + + @classmethod + def get_config_filenames(cls) -> List[str]: + return [] + + @classmethod + def from_config(cls, config: Dict[str, Any]) -> "FP8SerializedConfig": + return cls() + + def get_linear_method(self) -> "Fp8SerializedLinearMethod": + return Fp8SerializedLinearMethod(self) + + def get_scaled_act_names(self) -> List[str]: + return [] + + +class Fp8SerializedLinearMethod(LinearMethodBase): + """Linear method for StaticFP8 + . + Args: + quant_config: The quantization config. + """ + + def __init__(self, quant_config: FP8SerializedConfig): + self.quant_config = quant_config + + def create_weights( + self, + layer: torch.nn.Module, + input_size_per_partition: int, + output_partition_sizes: List[int], + input_size: int, + output_size: int, + params_dtype: torch.dtype, + **extra_weight_attrs, + ): + del input_size, output_size, params_dtype + + weight = Parameter(torch.empty(sum(output_partition_sizes), + input_size_per_partition, + dtype=torch.float8_e4m3fn), + requires_grad=False) + layer.register_parameter("weight", weight) + set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) + set_weight_attrs(weight, extra_weight_attrs) + + weight_scale = Parameter( + torch.empty( + len(output_partition_sizes), + device='cuda', dtype=torch.float32, + ), requires_grad=False + ) + layer.register_parameter("weight_scale", weight_scale) + set_weight_attrs(weight_scale, extra_weight_attrs) + set_weight_attrs(weight_scale, { + "shard_indexer": self.scales_shard_indexer, + }) + + layer.logical_widths = output_partition_sizes + + def shard_id_as_int( + self, + shard_id: Union[str, int] + ) -> int: + if isinstance(shard_id, int): + return shard_id + assert isinstance(shard_id, str) + qkv_idxs = { "q": 0, "k": 1, "v": 2 } + assert shard_id in qkv_idxs + return qkv_idxs[shard_id] + + # def scales_shard_splitter_NKK( + # self, + # param: torch.Tensor, + # loaded_weight: torch.Tensor, + # shard_id: Union[str, int], + # logical_widths: torch.Tensor + # ) -> Tuple[torch.Tensor, torch.Tensor]: + # shard_id = self.shard_id_as_int(shard_id) + # offset = sum(logical_widths[:shard_id]) + # size = logical_widths[shard_id] + # # update loaded weight with copies for broadcast. + # loaded_weight = loaded_weight.repeat(size) + # return param[offset : offset + size], loaded_weight + + def scales_shard_indexer( + self, + param: torch.Tensor, + loaded_weight: torch.Tensor, + shard_id: Union[str, int], + ) -> Tuple[torch.Tensor, torch.Tensor]: + return param[self.shard_id_as_int(shard_id)], loaded_weight + + def apply_weights(self, + layer: torch.nn.Module, + x: torch.Tensor, + bias: Optional[torch.Tensor] = None) -> torch.Tensor: + + qinput, x_scale = per_tensor_quantize(x) + + # FOR LOOP TO BE REPLACED BY CUTLASS KERNEL W/ EPILOGUE FUSION + output = torch.zeros(x.shape[0], layer.weight.shape[0], dtype=x.dtype, device="cuda") + start_offset = 0 + for _, (logical_width, w_scale) in enumerate(zip(layer.logical_widths, layer.weight_scale)): + end_offset = start_offset + logical_width + + out, _ = torch._scaled_mm( + qinput, + layer.weight[start_offset:end_offset, :].t(), + out_dtype=x.dtype, + scale_a=x_scale, + scale_b=w_scale, + bias=bias, + ) + output[:, start_offset:end_offset] = out + start_offset = end_offset + + return output + + +def per_tensor_quantize(tensor: torch.Tensor) -> tuple[torch.Tensor, float]: + """Quantize a tensor using per-tensor static scaling factor. + Args: + tensor: The input tensor. + """ + finfo = torch.finfo(torch.float8_e4m3fn) + # Calculate the scale as dtype max divided by absmax. + # Since .abs() creates a new tensor, we use aminmax to get + # the min and max first and then calculate the absmax. + min_val, max_val = tensor.aminmax() + amax = min_val.abs().max(max_val.abs()) + scale = finfo.max / amax.clamp(min=1e-12) + # scale and clamp the tensor to bring it to + # the representative range of float8 data type + # (as default cast is unsaturated) + qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max) + # Return both float8 data and the inverse scale (as float), + # as both required as inputs to torch._scaled_mm + qweight = qweight.to(torch.float8_e4m3fn) + scale = scale.float().reciprocal() + return qweight, scale From 2edd93afe360be2de69f23497b16436ba28673a9 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:07:48 -0400 Subject: [PATCH 19/90] Update fp8.py --- vllm/model_executor/layers/quantization/fp8.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 983a63e124ae8..01e494c870e71 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -112,7 +112,6 @@ def apply_weights(self, scale_b=layer.weight_scaling_factor, bias=bias, ) - return output From ccee5d337df4455587c099890f93e977972d3c4f Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 13:46:35 -0700 Subject: [PATCH 20/90] Implement static scaling for Mixtral --- csrc/ops.h | 7 ++++- csrc/pybind.cpp | 3 +- csrc/quantization/fp8/fp8_cuda_kernels.cu | 25 +++++++++++++++- vllm/_custom_ops.py | 9 ++++-- .../layers/fused_moe/fused_moe.py | 12 +++++--- .../model_executor/layers/quantization/fp8.py | 11 +++++-- vllm/model_executor/models/mixtral.py | 30 ++++++++++++++++++- 7 files changed, 84 insertions(+), 13 deletions(-) diff --git a/csrc/ops.h b/csrc/ops.h index ff7a3de1a0a8c..03bb1e24dc68e 100644 --- a/csrc/ops.h +++ b/csrc/ops.h @@ -146,7 +146,12 @@ void gptq_shuffle( torch::Tensor q_perm, int bit); -void scaled_fp8_quant( +void static_scaled_fp8_quant( + torch::Tensor& out, + torch::Tensor& input, + torch::Tensor& scale); + +void dynamic_scaled_fp8_quant( torch::Tensor& out, torch::Tensor& input, torch::Tensor& scale); diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp index a5b16c5abc3ed..2250c7f69f0ab 100644 --- a/csrc/pybind.cpp +++ b/csrc/pybind.cpp @@ -73,7 +73,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ops.def("gptq_gemm", &gptq_gemm, "Quantized GEMM for GPTQ"); ops.def("gptq_shuffle", &gptq_shuffle, "Post processing for GPTQ"); ops.def("squeezellm_gemm", &squeezellm_gemm, "Quantized GEMM for SqueezeLLM"); - ops.def("scaled_fp8_quant", &scaled_fp8_quant, "Compute FP8 quantized tensor and scaling factor"); + ops.def("static_scaled_fp8_quant", &static_scaled_fp8_quant, "Compute FP8 quantized tensor for given scaling factor"); + ops.def("dynamic_scaled_fp8_quant", &dynamic_scaled_fp8_quant, "Compute FP8 quantized tensor and scaling factor"); ops.def( "moe_align_block_size", &moe_align_block_size, diff --git a/csrc/quantization/fp8/fp8_cuda_kernels.cu b/csrc/quantization/fp8/fp8_cuda_kernels.cu index c3337cede1282..2477051eb60d7 100644 --- a/csrc/quantization/fp8/fp8_cuda_kernels.cu +++ b/csrc/quantization/fp8/fp8_cuda_kernels.cu @@ -74,7 +74,30 @@ __global__ void scaled_fp8_quant_kernel( } // namespace vllm -void scaled_fp8_quant( +void static_scaled_fp8_quant( + torch::Tensor& out, // [..., d] + torch::Tensor& input, // [..., d] + torch::Tensor& scale) // [1] +{ + int64_t num_tokens = input.numel() / input.size(-1); + int64_t num_elems = input.numel(); + dim3 grid(num_tokens); + dim3 block(1024); + const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + VLLM_DISPATCH_FLOATING_TYPES( + input.scalar_type(), + "scaled_fp8_quant_kernel", + [&] { + vllm::scaled_fp8_quant_kernel<<>>( + out.data_ptr(), + input.data_ptr(), + scale.data_ptr(), + num_elems); + }); +} + +void dynamic_scaled_fp8_quant( torch::Tensor& out, // [..., d] torch::Tensor& input, // [..., d] torch::Tensor& scale) // [1] diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index e4b16ed918d1a..73e7b723edb31 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -154,10 +154,13 @@ def marlin_gemm(a: torch.Tensor, b_q_weight: torch.Tensor, # fp8 -def scaled_fp8_quant(input: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - scale = torch.zeros(1, device=input.device, dtype=torch.float32) +def scaled_fp8_quant(input: torch.Tensor, scale: Optional[torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]: output = torch.empty_like(input, dtype=torch.float8_e4m3fn) - vllm_ops.scaled_fp8_quant(output, input, scale) + if scale: + vllm_ops.static_scaled_fp8_quant(output, input, scale) + else: + scale = torch.zeros(1, device=input.device, dtype=torch.float32) + vllm_ops.dynamic_scaled_fp8_quant(output, input, scale) return output, scale diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index ac7c30e2a9727..b77732cff22ad 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -220,8 +220,8 @@ def moe_align_block_size( def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, - B_scale: torch.Tensor, topk_weights: torch.Tensor, - topk_ids: torch.Tensor, + A_scale: Optional[torch.Tensor], B_scale: torch.Tensor, + topk_weights: torch.Tensor, topk_ids: torch.Tensor, sorted_token_ids: torch.Tensor, expert_ids: torch.Tensor, num_tokens_post_padded: torch.Tensor, @@ -232,10 +232,10 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, assert sorted_token_ids.stride(0) == 1 if not use_fp8: - A_scale = None + assert A_scale is None assert B_scale is None else: - A, A_scale = ops.scaled_fp8_quant(A) + A, A_scale = ops.scaled_fp8_quant(A, A_scale) assert B_scale is not None grid = lambda META: (triton.cdiv(sorted_token_ids.shape[0], META[ @@ -318,6 +318,8 @@ def fused_moe( use_fp8: bool = False, w1_scale: Optional[torch.Tensor] = None, w2_scale: Optional[torch.Tensor] = None, + a1_scale: Optional[torch.Tensor] = None, + a2_scale: Optional[torch.Tensor] = None, ) -> torch.Tensor: """ This function computes a Mixture of Experts (MoE) layer using two sets of @@ -434,6 +436,7 @@ def fused_moe( invoke_fused_moe_kernel(hidden_states, w1, intermediate_cache1, + a1_scale, w1_scale, topk_weights, topk_ids, @@ -451,6 +454,7 @@ def fused_moe( invoke_fused_moe_kernel(intermediate_cache2, w2, intermediate_cache3, + a2_scale, w2_scale, topk_weights, topk_ids, diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 01e494c870e71..633021fda894b 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -13,6 +13,12 @@ class FP8Config(QuantizationConfig): """Config class for FP8.""" + def __init__( + self, + act_scaling: str="dynamic", + ) -> None: + self.act_scaling = act_scaling + @classmethod def get_name(cls) -> str: return "fp8" @@ -30,11 +36,12 @@ def get_min_capability(cls) -> int: @classmethod def get_config_filenames(cls) -> List[str]: - return [] + return ["quantize_config.json"] @classmethod def from_config(cls, config: Dict[str, Any]) -> "FP8Config": - return cls() + act_scaling = cls.get_from_keys(config, ["act_scaling"]) + return cls(act_scaling) def get_linear_method(self) -> "Fp8LinearMethod": return Fp8LinearMethod(self) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index a33b795d7088e..951c44ca24622 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -114,12 +114,27 @@ def __init__( self.num_total_experts, device="cuda", dtype=torch.float32), requires_grad=False) if self.use_fp8 else None + # Scaling factors for FP8 activations + static_act_scaling = self.use_fp8 and linear_method.act_scaling == "static" + self.as_scale = nn.Parameter( + torch.zeros(1, device="cuda", dtype=torch.float32), + requires_grad=False) if static_act_scaling else None + self.a2s_scale = nn.Parameter( + torch.zeros(1, device="cuda", dtype=torch.float32), + requires_grad=False) if static_act_scaling else None + set_weight_attrs(self.ws, { "weight_loader": self.weight_loader, }) set_weight_attrs(self.w2s, { "weight_loader": self.weight_loader, }) + set_weight_attrs(self.as_scale, { + "weight_loader": self.weight_loader, + }) + set_weight_attrs(self.a2s_scale, { + "weight_loader": self.weight_loader, + }) def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, weight_name: str, expert_id: int): @@ -134,6 +149,9 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, shard_size:2 * shard_size, :] = loaded_weight[shard, :] if weight_name.endswith("w2.weight"): param_data[expert_id, :, :] = loaded_weight[:, shard] + if "activation_scale" in weight_name: + param_data[:] = param_data[:].max(loaded_weight) + print("loaded scale", weight_name, param_data) def process_weights_after_loading(self): if self.use_fp8: @@ -161,7 +179,9 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: inplace=True, use_fp8=self.use_fp8, w1_scale=self.ws_scale, - w2_scale=self.w2s_scale) + w2_scale=self.w2s_scale, + a1_scale=self.a1_scale, + a2_scale=self.a2_scale) if self.tp_size > 1: final_hidden_states = tensor_model_parallel_all_reduce( @@ -443,11 +463,19 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] expert_params_mapping = [ + # These are the weights for the experts # (param_name, weight_name, expert_id) ("ws" if weight_name in ["w1", "w3"] else "w2s", f"experts.{expert_id}.{weight_name}.weight", expert_id) for expert_id in range(self.config.num_local_experts) for weight_name in ["w1", "w2", "w3"] + ] + [ + # These are the activation scales for the experts + # (param_name, weight_name, expert_id) + ("a_scale" if activation_name in ["a1", "a3"] else "a2_scale", + f"experts.{expert_id}.{activation_name}.activation_scale", expert_id) + for expert_id in range(self.config.num_local_experts) + for activation_name in ["a1", "a2", "a3"] ] params_dict = dict(self.named_parameters()) From 8f71c795314101187aa4aa65c128a73781ef14a4 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:01:32 -0700 Subject: [PATCH 21/90] fix --- vllm/model_executor/models/mixtral.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 951c44ca24622..6a52db56b50b9 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -115,13 +115,13 @@ def __init__( requires_grad=False) if self.use_fp8 else None # Scaling factors for FP8 activations - static_act_scaling = self.use_fp8 and linear_method.act_scaling == "static" + need_act_scales = not self.use_fp8 or linear_method.act_scaling == "static" self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), - requires_grad=False) if static_act_scaling else None + requires_grad=False) if need_act_scales else None self.a2s_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), - requires_grad=False) if static_act_scaling else None + requires_grad=False) if need_act_scales else None set_weight_attrs(self.ws, { "weight_loader": self.weight_loader, From 6eb01e07977b7a93348dc5b3247a5b3f369f7720 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:07:48 -0700 Subject: [PATCH 22/90] update --- vllm/model_executor/model_loader/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 9995f2afe3cf7..2cf4cd9f5c642 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -147,7 +147,7 @@ def get_quant_config(model_config: ModelConfig, f for f in config_files if any( f.endswith(x) for x in possible_config_filenames) ] - if len(quant_config_files) == 0: + if len(quant_config_files) == 0 and "" not in possible_config_filenames: raise ValueError( f"Cannot find the config file for {model_config.quantization}") if len(quant_config_files) > 1: From dc89cbc364abbcd471c547381de53cb72bd76c1b Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:16:39 -0700 Subject: [PATCH 23/90] fix --- vllm/model_executor/layers/quantization/fp8.py | 2 ++ vllm/model_executor/model_loader/weight_utils.py | 13 ++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 633021fda894b..7c0e0e9e8e45b 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -13,6 +13,8 @@ class FP8Config(QuantizationConfig): """Config class for FP8.""" + config_file_optional = True + def __init__( self, act_scaling: str="dynamic", diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 2cf4cd9f5c642..8fc85da86748c 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -135,19 +135,18 @@ def get_quant_config(model_config: ModelConfig, else: hf_folder = model_name_or_path - possible_config_filenames = quant_cls.get_config_filenames() - - # If the quantization config is not found, use the default config. - if not possible_config_filenames: - return quant_cls() - config_files = glob.glob(os.path.join(hf_folder, "*.json")) quant_config_files = [ f for f in config_files if any( f.endswith(x) for x in possible_config_filenames) ] - if len(quant_config_files) == 0 and "" not in possible_config_filenames: + + possible_config_filenames = quant_cls.get_config_filenames() + # If the quantization config is optional and not provided, use the default config. + if quant_cls.getattr("config_file_optional", False) and not quant_config_files: + return quant_cls() + if len(quant_config_files) == 0: raise ValueError( f"Cannot find the config file for {model_config.quantization}") if len(quant_config_files) > 1: From be60845acd14b0639fc565a7aa954ad98df5c7b3 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:18:08 -0700 Subject: [PATCH 24/90] update --- vllm/model_executor/model_loader/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 8fc85da86748c..df5b8fd80d1a3 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -136,13 +136,13 @@ def get_quant_config(model_config: ModelConfig, hf_folder = model_name_or_path config_files = glob.glob(os.path.join(hf_folder, "*.json")) + possible_config_filenames = quant_cls.get_config_filenames() quant_config_files = [ f for f in config_files if any( f.endswith(x) for x in possible_config_filenames) ] - possible_config_filenames = quant_cls.get_config_filenames() # If the quantization config is optional and not provided, use the default config. if quant_cls.getattr("config_file_optional", False) and not quant_config_files: return quant_cls() From 4613cb562f4df81e651b798d408fb9bfab067640 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:19:52 -0700 Subject: [PATCH 25/90] update --- vllm/model_executor/model_loader/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index df5b8fd80d1a3..f3e0411c54336 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -144,7 +144,7 @@ def get_quant_config(model_config: ModelConfig, ] # If the quantization config is optional and not provided, use the default config. - if quant_cls.getattr("config_file_optional", False) and not quant_config_files: + if getattr(quant_cls, "config_file_optional", False) and not quant_config_files: return quant_cls() if len(quant_config_files) == 0: raise ValueError( From 3d95d86e040fa643f110f952d42376ca26a702ce Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:21:41 -0700 Subject: [PATCH 26/90] fix --- vllm/model_executor/models/mixtral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 6a52db56b50b9..c818764ff6799 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -115,7 +115,7 @@ def __init__( requires_grad=False) if self.use_fp8 else None # Scaling factors for FP8 activations - need_act_scales = not self.use_fp8 or linear_method.act_scaling == "static" + need_act_scales = not self.use_fp8 or linear_method.quant_config.act_scaling == "static" self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None From 642763fd5a54bf442b17fdcebbc27dabc398144a Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:23:42 -0700 Subject: [PATCH 27/90] move --- vllm/model_executor/models/mixtral.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index c818764ff6799..7260d1e7667ea 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -104,6 +104,13 @@ def __init__( device="cuda", dtype=self.params_dtype)) + set_weight_attrs(self.ws, { + "weight_loader": self.weight_loader, + }) + set_weight_attrs(self.w2s, { + "weight_loader": self.weight_loader, + }) + # Scaling factors for FP8 weights self.ws_scale = nn.Parameter( torch.ones( @@ -123,18 +130,13 @@ def __init__( torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None - set_weight_attrs(self.ws, { - "weight_loader": self.weight_loader, - }) - set_weight_attrs(self.w2s, { - "weight_loader": self.weight_loader, - }) - set_weight_attrs(self.as_scale, { - "weight_loader": self.weight_loader, - }) - set_weight_attrs(self.a2s_scale, { - "weight_loader": self.weight_loader, - }) + if need_act_scales: + set_weight_attrs(self.as_scale, { + "weight_loader": self.weight_loader, + }) + set_weight_attrs(self.a2s_scale, { + "weight_loader": self.weight_loader, + }) def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, weight_name: str, expert_id: int): From 706e9317ef3c4f3ee715df3b0f0571a832073d11 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:27:04 -0700 Subject: [PATCH 28/90] update --- vllm/model_executor/models/mixtral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 7260d1e7667ea..9f4673b59c673 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -182,8 +182,8 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: use_fp8=self.use_fp8, w1_scale=self.ws_scale, w2_scale=self.w2s_scale, - a1_scale=self.a1_scale, - a2_scale=self.a2_scale) + a1_scale=self.as_scale, + a2_scale=self.a2s_scale) if self.tp_size > 1: final_hidden_states = tensor_model_parallel_all_reduce( From 9a3c78ccbb06504400709d1148d1b9c34afd3295 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 14:31:18 -0700 Subject: [PATCH 29/90] lol --- vllm/model_executor/models/mixtral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 9f4673b59c673..e4a0ba7902682 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -122,7 +122,7 @@ def __init__( requires_grad=False) if self.use_fp8 else None # Scaling factors for FP8 activations - need_act_scales = not self.use_fp8 or linear_method.quant_config.act_scaling == "static" + need_act_scales = self.use_fp8 and linear_method.quant_config.act_scaling == "static" self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None From 1b6f0201c60c3e75bcfc45d96f9c32d7f1754c72 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 15:15:06 -0700 Subject: [PATCH 30/90] fix cuda graph --- vllm/_custom_ops.py | 14 +++++++------ .../layers/fused_moe/fused_moe.py | 21 +++++++++++-------- vllm/model_executor/models/mixtral.py | 16 +++++++------- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 73e7b723edb31..0a18714da0e5c 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -154,13 +154,15 @@ def marlin_gemm(a: torch.Tensor, b_q_weight: torch.Tensor, # fp8 -def scaled_fp8_quant(input: torch.Tensor, scale: Optional[torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]: +def static_scaled_fp8_quant(input: torch.Tensor, scale: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: output = torch.empty_like(input, dtype=torch.float8_e4m3fn) - if scale: - vllm_ops.static_scaled_fp8_quant(output, input, scale) - else: - scale = torch.zeros(1, device=input.device, dtype=torch.float32) - vllm_ops.dynamic_scaled_fp8_quant(output, input, scale) + vllm_ops.static_scaled_fp8_quant(output, input, scale) + return output + +def dynamic_scaled_fp8_quant(input: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + output = torch.empty_like(input, dtype=torch.float8_e4m3fn) + scale = torch.zeros(1, device=input.device, dtype=torch.float32) + vllm_ops.dynamic_scaled_fp8_quant(output, input, scale) return output, scale diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index b77732cff22ad..556ecc5f30868 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -227,15 +227,18 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, num_tokens_post_padded: torch.Tensor, mul_routed_weight: bool, top_k: int, config: Dict[str, Any], compute_type: tl.dtype, - use_fp8: bool) -> None: + linear_method: Optional[LinearMethodBase]) -> None: assert topk_weights.stride(1) == 1 assert sorted_token_ids.stride(0) == 1 - if not use_fp8: + if not isinstance(linear_method, Fp8LinearMethod): assert A_scale is None assert B_scale is None - else: - A, A_scale = ops.scaled_fp8_quant(A, A_scale) + elif linear_method.quant_config.act_scaling == "static": + A = ops.static_scaled_fp8_quant(A, A_scale) + assert B_scale is not None + elif linear_method.quant_config.act_scaling == "dynamic": + A, A_scale = ops.dynamic_scaled_fp8_quant(A) assert B_scale is not None grid = lambda META: (triton.cdiv(sorted_token_ids.shape[0], META[ @@ -265,7 +268,7 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, MUL_ROUTED_WEIGHT=mul_routed_weight, top_k=top_k, compute_type=compute_type, - use_fp8=use_fp8, + use_fp8=isinstance(linear_method, Fp8LinearMethod), **config, ) @@ -315,7 +318,7 @@ def fused_moe( renormalize: bool, inplace: bool = False, override_config: Optional[Dict[str, Any]] = None, - use_fp8: bool = False, + linear_method: Optional[LinearMethodBase] = None, w1_scale: Optional[torch.Tensor] = None, w2_scale: Optional[torch.Tensor] = None, a1_scale: Optional[torch.Tensor] = None, @@ -397,7 +400,7 @@ def fused_moe( else: # First try to load optimal config from the file configs = get_moe_configs(E, w2.shape[2], - "float8" if use_fp8 else None) + "float8" if isinstance(linear_method, Fp8LinearMethod) else None) if configs: # If an optimal configuration map has been found, look up the @@ -447,7 +450,7 @@ def fused_moe( topk_ids.shape[1], config, compute_type=tl.float16, - use_fp8=use_fp8) + linear_method=linear_method) ops.silu_and_mul(intermediate_cache2, intermediate_cache1.view(-1, N)) @@ -465,7 +468,7 @@ def fused_moe( 1, config, compute_type=tl.float16, - use_fp8=use_fp8) + linear_method=linear_method) if inplace: return torch.sum(intermediate_cache3.view(*intermediate_cache3.shape), diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index e4a0ba7902682..ca051e8fa813c 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -77,9 +77,7 @@ def __init__( self.top_k = top_k self.hidden_size = hidden_size self.intermediate_size = intermediate_size // self.tp_size - # FIXME(pcmoritz): Make this more general to support different - # quantization schemes - self.use_fp8 = isinstance(linear_method, Fp8LinearMethod) + self.linear_method = linear_method if params_dtype is None: params_dtype = torch.get_default_dtype() @@ -111,18 +109,20 @@ def __init__( "weight_loader": self.weight_loader, }) + use_fp8 = isinstance(linear_method, Fp8LinearMethod) + # Scaling factors for FP8 weights self.ws_scale = nn.Parameter( torch.ones( self.num_total_experts, device="cuda", dtype=torch.float32), - requires_grad=False) if self.use_fp8 else None + requires_grad=False) if use_fp8 else None self.w2s_scale = nn.Parameter( torch.ones( self.num_total_experts, device="cuda", dtype=torch.float32), - requires_grad=False) if self.use_fp8 else None + requires_grad=False) if use_fp8 else None # Scaling factors for FP8 activations - need_act_scales = self.use_fp8 and linear_method.quant_config.act_scaling == "static" + need_act_scales = use_fp8 and linear_method.quant_config.act_scaling == "static" self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None @@ -156,7 +156,7 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, print("loaded scale", weight_name, param_data) def process_weights_after_loading(self): - if self.use_fp8: + if isinstance(self.linear_method, Fp8LinearMethod): ws = torch.empty_like(self.ws.data, dtype=torch.float8_e4m3fn) w2s = torch.empty_like(self.w2s.data, dtype=torch.float8_e4m3fn) for expert in range(self.num_total_experts): @@ -179,7 +179,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: self.top_k, renormalize=True, inplace=True, - use_fp8=self.use_fp8, + linear_method=self.linear_method, w1_scale=self.ws_scale, w2_scale=self.w2s_scale, a1_scale=self.as_scale, From b09bcecc6ff019ce5a4b3ece92d9e591832d1f5a Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 15:20:17 -0700 Subject: [PATCH 31/90] fix --- vllm/model_executor/layers/fused_moe/fused_moe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 556ecc5f30868..dab54a385b119 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -10,6 +10,7 @@ from vllm import _custom_ops as ops from vllm.logger import init_logger +from vllm.model_executor.layers.linear import LinearMethodBase from vllm.utils import is_hip logger = init_logger(__name__) From 052e2b3b83e6760739e2101252f38ed198784029 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 15:22:17 -0700 Subject: [PATCH 32/90] update --- vllm/model_executor/layers/fused_moe/fused_moe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index dab54a385b119..db22c38011c82 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -11,6 +11,7 @@ from vllm import _custom_ops as ops from vllm.logger import init_logger from vllm.model_executor.layers.linear import LinearMethodBase +from vllm.model_executor.layers.quantization.fp8 import Fp8LinearMethod from vllm.utils import is_hip logger = init_logger(__name__) From b33c6d7a5208564735b6f6eeeeaed88b51a7be30 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 18:46:25 -0700 Subject: [PATCH 33/90] update --- vllm/model_executor/models/mixtral.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index ca051e8fa813c..c233cd0d9b065 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -474,10 +474,10 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] + [ # These are the activation scales for the experts # (param_name, weight_name, expert_id) - ("a_scale" if activation_name in ["a1", "a3"] else "a2_scale", - f"experts.{expert_id}.{activation_name}.activation_scale", expert_id) + ("as_scale" if weight_name in ["w1", "w3"] else "a2s_scale", + f"experts.{expert_id}.{weight_name}.activation_scale", expert_id) for expert_id in range(self.config.num_local_experts) - for activation_name in ["a1", "a2", "a3"] + for weight_name in ["w1", "w2", "w3"] ] params_dict = dict(self.named_parameters()) From 475f58d9a25a2cc0de04dcfbcba733590aeafd11 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 18:56:02 -0700 Subject: [PATCH 34/90] refactor --- vllm/model_executor/layers/quantization/base_config.py | 6 ++++++ vllm/model_executor/layers/quantization/fp8.py | 6 ++++-- vllm/model_executor/model_loader/weight_utils.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/base_config.py b/vllm/model_executor/layers/quantization/base_config.py index 6115e7c3be956..08061f83b5a38 100644 --- a/vllm/model_executor/layers/quantization/base_config.py +++ b/vllm/model_executor/layers/quantization/base_config.py @@ -29,6 +29,12 @@ def get_min_capability(self) -> int: """ raise NotImplementedError + # The following is not an abstract method and returns True by default. + @classmethod + def require_config_file(cls) -> bool: + """Whether this quantization config needs a configuration filen.""" + return True + @staticmethod @abstractmethod def get_config_filenames() -> List[str]: diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 7c0e0e9e8e45b..d5e37cbd7bd5e 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -13,8 +13,6 @@ class FP8Config(QuantizationConfig): """Config class for FP8.""" - config_file_optional = True - def __init__( self, act_scaling: str="dynamic", @@ -36,6 +34,10 @@ def get_min_capability(cls) -> int: # be included: https://github.com/pytorch/pytorch/pull/118881 return 90 + @classmethod + def require_config_file(cls) -> bool: + return False + @classmethod def get_config_filenames(cls) -> List[str]: return ["quantize_config.json"] diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index f3e0411c54336..70879f91bdc23 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -144,7 +144,7 @@ def get_quant_config(model_config: ModelConfig, ] # If the quantization config is optional and not provided, use the default config. - if getattr(quant_cls, "config_file_optional", False) and not quant_config_files: + if not quant_cls.require_config_file() and not quant_config_files: return quant_cls() if len(quant_config_files) == 0: raise ValueError( From 56b4880b47e4911bebaabe788eaeba08812eaa1b Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 19:16:27 -0700 Subject: [PATCH 35/90] update --- vllm/_custom_ops.py | 14 ++++++-------- vllm/model_executor/layers/fused_moe/fused_moe.py | 7 ++----- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 0a18714da0e5c..b871b8e3a11d9 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -154,15 +154,13 @@ def marlin_gemm(a: torch.Tensor, b_q_weight: torch.Tensor, # fp8 -def static_scaled_fp8_quant(input: torch.Tensor, scale: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: +def scaled_fp8_quant(input: torch.Tensor, scale: Optional[torch.Tensor]=None) -> Tuple[torch.Tensor, torch.Tensor]: output = torch.empty_like(input, dtype=torch.float8_e4m3fn) - vllm_ops.static_scaled_fp8_quant(output, input, scale) - return output - -def dynamic_scaled_fp8_quant(input: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - output = torch.empty_like(input, dtype=torch.float8_e4m3fn) - scale = torch.zeros(1, device=input.device, dtype=torch.float32) - vllm_ops.dynamic_scaled_fp8_quant(output, input, scale) + if scale is None: + scale = torch.zeros(1, device=input.device, dtype=torch.float32) + vllm_ops.dynamic_scaled_fp8_quant(output, input, scale) + else: + vllm_ops.static_scaled_fp8_quant(output, input, scale) return output, scale diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index db22c38011c82..6fe94d4e268c3 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -236,11 +236,8 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, if not isinstance(linear_method, Fp8LinearMethod): assert A_scale is None assert B_scale is None - elif linear_method.quant_config.act_scaling == "static": - A = ops.static_scaled_fp8_quant(A, A_scale) - assert B_scale is not None - elif linear_method.quant_config.act_scaling == "dynamic": - A, A_scale = ops.dynamic_scaled_fp8_quant(A) + else: + A, A_scale = ops.scaled_fp8_quant(A, A_scale) assert B_scale is not None grid = lambda META: (triton.cdiv(sorted_token_ids.shape[0], META[ From be371549899fd3e169bb8d3f1730c27e83ecf760 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 19:27:40 -0700 Subject: [PATCH 36/90] revert --- .../model_executor/layers/fused_moe/fused_moe.py | 16 +++++++--------- vllm/model_executor/models/mixtral.py | 16 ++++++++-------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 6fe94d4e268c3..b77732cff22ad 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -10,8 +10,6 @@ from vllm import _custom_ops as ops from vllm.logger import init_logger -from vllm.model_executor.layers.linear import LinearMethodBase -from vllm.model_executor.layers.quantization.fp8 import Fp8LinearMethod from vllm.utils import is_hip logger = init_logger(__name__) @@ -229,11 +227,11 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, num_tokens_post_padded: torch.Tensor, mul_routed_weight: bool, top_k: int, config: Dict[str, Any], compute_type: tl.dtype, - linear_method: Optional[LinearMethodBase]) -> None: + use_fp8: bool) -> None: assert topk_weights.stride(1) == 1 assert sorted_token_ids.stride(0) == 1 - if not isinstance(linear_method, Fp8LinearMethod): + if not use_fp8: assert A_scale is None assert B_scale is None else: @@ -267,7 +265,7 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, MUL_ROUTED_WEIGHT=mul_routed_weight, top_k=top_k, compute_type=compute_type, - use_fp8=isinstance(linear_method, Fp8LinearMethod), + use_fp8=use_fp8, **config, ) @@ -317,7 +315,7 @@ def fused_moe( renormalize: bool, inplace: bool = False, override_config: Optional[Dict[str, Any]] = None, - linear_method: Optional[LinearMethodBase] = None, + use_fp8: bool = False, w1_scale: Optional[torch.Tensor] = None, w2_scale: Optional[torch.Tensor] = None, a1_scale: Optional[torch.Tensor] = None, @@ -399,7 +397,7 @@ def fused_moe( else: # First try to load optimal config from the file configs = get_moe_configs(E, w2.shape[2], - "float8" if isinstance(linear_method, Fp8LinearMethod) else None) + "float8" if use_fp8 else None) if configs: # If an optimal configuration map has been found, look up the @@ -449,7 +447,7 @@ def fused_moe( topk_ids.shape[1], config, compute_type=tl.float16, - linear_method=linear_method) + use_fp8=use_fp8) ops.silu_and_mul(intermediate_cache2, intermediate_cache1.view(-1, N)) @@ -467,7 +465,7 @@ def fused_moe( 1, config, compute_type=tl.float16, - linear_method=linear_method) + use_fp8=use_fp8) if inplace: return torch.sum(intermediate_cache3.view(*intermediate_cache3.shape), diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index c233cd0d9b065..9baf176fc78ee 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -77,7 +77,9 @@ def __init__( self.top_k = top_k self.hidden_size = hidden_size self.intermediate_size = intermediate_size // self.tp_size - self.linear_method = linear_method + # FIXME(pcmoritz): Make this more general to support different + # quantization schemes + self.use_fp8 = isinstance(linear_method, Fp8LinearMethod) if params_dtype is None: params_dtype = torch.get_default_dtype() @@ -109,20 +111,18 @@ def __init__( "weight_loader": self.weight_loader, }) - use_fp8 = isinstance(linear_method, Fp8LinearMethod) - # Scaling factors for FP8 weights self.ws_scale = nn.Parameter( torch.ones( self.num_total_experts, device="cuda", dtype=torch.float32), - requires_grad=False) if use_fp8 else None + requires_grad=False) if self.use_fp8 else None self.w2s_scale = nn.Parameter( torch.ones( self.num_total_experts, device="cuda", dtype=torch.float32), - requires_grad=False) if use_fp8 else None + requires_grad=False) if self.use_fp8 else None # Scaling factors for FP8 activations - need_act_scales = use_fp8 and linear_method.quant_config.act_scaling == "static" + need_act_scales = self.use_fp8 and linear_method.quant_config.act_scaling == "static" self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None @@ -156,7 +156,7 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, print("loaded scale", weight_name, param_data) def process_weights_after_loading(self): - if isinstance(self.linear_method, Fp8LinearMethod): + if self.use_fp8: ws = torch.empty_like(self.ws.data, dtype=torch.float8_e4m3fn) w2s = torch.empty_like(self.w2s.data, dtype=torch.float8_e4m3fn) for expert in range(self.num_total_experts): @@ -179,7 +179,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: self.top_k, renormalize=True, inplace=True, - linear_method=self.linear_method, + use_fp8=self.use_fp8, w1_scale=self.ws_scale, w2_scale=self.w2s_scale, a1_scale=self.as_scale, From 9c54d1923876d661f93d470a539903415036735f Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 19:35:27 -0700 Subject: [PATCH 37/90] format --- vllm/_custom_ops.py | 5 ++++- vllm/model_executor/layers/fused_moe/fused_moe.py | 5 +++-- vllm/model_executor/layers/quantization/fp8.py | 2 +- vllm/model_executor/model_loader/weight_utils.py | 4 +++- vllm/model_executor/models/mixtral.py | 3 ++- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index b871b8e3a11d9..c6f330f211e8a 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -154,7 +154,10 @@ def marlin_gemm(a: torch.Tensor, b_q_weight: torch.Tensor, # fp8 -def scaled_fp8_quant(input: torch.Tensor, scale: Optional[torch.Tensor]=None) -> Tuple[torch.Tensor, torch.Tensor]: +def scaled_fp8_quant( + input: torch.Tensor, + scale: Optional[torch.Tensor] = None +) -> Tuple[torch.Tensor, torch.Tensor]: output = torch.empty_like(input, dtype=torch.float8_e4m3fn) if scale is None: scale = torch.zeros(1, device=input.device, dtype=torch.float32) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index b77732cff22ad..17b140195676e 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -220,8 +220,9 @@ def moe_align_block_size( def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, - A_scale: Optional[torch.Tensor], B_scale: torch.Tensor, - topk_weights: torch.Tensor, topk_ids: torch.Tensor, + A_scale: Optional[torch.Tensor], + B_scale: torch.Tensor, topk_weights: torch.Tensor, + topk_ids: torch.Tensor, sorted_token_ids: torch.Tensor, expert_ids: torch.Tensor, num_tokens_post_padded: torch.Tensor, diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index d5e37cbd7bd5e..ab562288a8f7d 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -15,7 +15,7 @@ class FP8Config(QuantizationConfig): def __init__( self, - act_scaling: str="dynamic", + act_scaling: str = "dynamic", ) -> None: self.act_scaling = act_scaling diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 70879f91bdc23..54fd4f22f0eef 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -143,9 +143,11 @@ def get_quant_config(model_config: ModelConfig, f.endswith(x) for x in possible_config_filenames) ] - # If the quantization config is optional and not provided, use the default config. + # If the quantization config is optional and not provided, use the + # default config. if not quant_cls.require_config_file() and not quant_config_files: return quant_cls() + if len(quant_config_files) == 0: raise ValueError( f"Cannot find the config file for {model_config.quantization}") diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 9baf176fc78ee..f42e1c939d723 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -122,7 +122,8 @@ def __init__( requires_grad=False) if self.use_fp8 else None # Scaling factors for FP8 activations - need_act_scales = self.use_fp8 and linear_method.quant_config.act_scaling == "static" + need_act_scales = (self.use_fp8 and + linear_method.quant_config.act_scaling == "static") self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None From c5155eab2f068dcd49a2fa78b0b198e0fc36582e Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 20:48:55 -0700 Subject: [PATCH 38/90] Update vllm/_custom_ops.py Co-authored-by: Woosuk Kwon --- vllm/_custom_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index c6f330f211e8a..9d40ec4928276 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -155,8 +155,8 @@ def marlin_gemm(a: torch.Tensor, b_q_weight: torch.Tensor, # fp8 def scaled_fp8_quant( - input: torch.Tensor, - scale: Optional[torch.Tensor] = None + input: torch.Tensor, + scale: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor]: output = torch.empty_like(input, dtype=torch.float8_e4m3fn) if scale is None: From 948cca76dff98914d7ad08dea7eed17c10f2b933 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 21:16:41 -0700 Subject: [PATCH 39/90] Update vllm/model_executor/layers/fused_moe/fused_moe.py Co-authored-by: Woosuk Kwon --- vllm/model_executor/layers/fused_moe/fused_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 17b140195676e..8e49c86202eb7 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -221,7 +221,7 @@ def moe_align_block_size( def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, A_scale: Optional[torch.Tensor], - B_scale: torch.Tensor, topk_weights: torch.Tensor, + B_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, sorted_token_ids: torch.Tensor, expert_ids: torch.Tensor, From 3feb88790dd5ab0d9d8244a92015a0ac27525387 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 21:16:52 -0700 Subject: [PATCH 40/90] Update vllm/model_executor/models/mixtral.py Co-authored-by: Woosuk Kwon --- vllm/model_executor/models/mixtral.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index f42e1c939d723..4439a75e5b57f 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -154,7 +154,6 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, param_data[expert_id, :, :] = loaded_weight[:, shard] if "activation_scale" in weight_name: param_data[:] = param_data[:].max(loaded_weight) - print("loaded scale", weight_name, param_data) def process_weights_after_loading(self): if self.use_fp8: From df1631690c706b72df401316858884585167970f Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 24 Apr 2024 21:44:13 -0700 Subject: [PATCH 41/90] format --- vllm/model_executor/layers/fused_moe/fused_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 8e49c86202eb7..0cc59ae57d0d3 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -221,8 +221,8 @@ def moe_align_block_size( def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, A_scale: Optional[torch.Tensor], - B_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, - topk_ids: torch.Tensor, + B_scale: Optional[torch.Tensor], + topk_weights: torch.Tensor, topk_ids: torch.Tensor, sorted_token_ids: torch.Tensor, expert_ids: torch.Tensor, num_tokens_post_padded: torch.Tensor, From 7b6b0fa7f9a04e77c4568a66c4da47e3fff1937d Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Thu, 25 Apr 2024 13:50:24 +0000 Subject: [PATCH 42/90] support static scales --- run_fp8.py | 2 +- .../layers/quantization/__init__.py | 5 +- .../layers/quantization/fp8_serialized.py | 60 +++++++++++++++---- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 16b1fab255293..19f991902d486 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -1,7 +1,7 @@ from vllm import LLM from transformers import AutoTokenizer -model = LLM("nm-testing/mistral-fp8-test", tokenizer="mistralai/Mistral-7B-Instruct-v0.2", quantization="fp8_serialized", enforce_eager=True, max_model_len=1024) +model = LLM("nm-testing/mistral-fp8-static", enforce_eager=True, max_model_len=1024) tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") prompt = tokenizer.apply_chat_template([{"role": "user", "content": "What is your name"}], tokenize=False, add_generation_prompt=True) diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py index dbbe466dd7a94..060eaad31899b 100644 --- a/vllm/model_executor/layers/quantization/__init__.py +++ b/vllm/model_executor/layers/quantization/__init__.py @@ -4,8 +4,8 @@ from vllm.model_executor.layers.quantization.awq import AWQConfig from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) -from vllm.model_executor.layers.quantization.fp8 import FP8Config -from vllm.model_executor.layers.quantization.fp8_serialized import FP8SerializedConfig +# from vllm.model_executor.layers.quantization.fp8 import FP8Config +from vllm.model_executor.layers.quantization.fp8_serialized import FP8Config from vllm.model_executor.layers.quantization.gptq import GPTQConfig from vllm.model_executor.layers.quantization.marlin import MarlinConfig from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig @@ -14,7 +14,6 @@ "aqlm": AQLMConfig, "awq": AWQConfig, "fp8": FP8Config, - "fp8_serialized": FP8SerializedConfig, "gptq": GPTQConfig, "squeezellm": SqueezeLLMConfig, "marlin": MarlinConfig, diff --git a/vllm/model_executor/layers/quantization/fp8_serialized.py b/vllm/model_executor/layers/quantization/fp8_serialized.py index 933a70a7d6d10..c187bed817412 100644 --- a/vllm/model_executor/layers/quantization/fp8_serialized.py +++ b/vllm/model_executor/layers/quantization/fp8_serialized.py @@ -9,12 +9,18 @@ QuantizationConfig) -class FP8SerializedConfig(QuantizationConfig): +class FP8Config(QuantizationConfig): """Config class for FP8.""" + def __init__( + self, + scheme: str, + ) -> None: + assert scheme == "static" or scheme == "dynamic" + self.scheme = scheme @classmethod def get_name(cls) -> str: - return "fp8_static" + return "fp8" @classmethod def get_supported_act_dtypes(cls) -> List[torch.dtype]: @@ -29,24 +35,24 @@ def get_config_filenames(cls) -> List[str]: return [] @classmethod - def from_config(cls, config: Dict[str, Any]) -> "FP8SerializedConfig": - return cls() + def from_config(cls, config: Dict[str, Any]) -> "FP8Config": + scheme = cls.get_from_keys(config, ["scheme"]) + return cls(scheme=scheme) - def get_linear_method(self) -> "Fp8SerializedLinearMethod": - return Fp8SerializedLinearMethod(self) + def get_linear_method(self) -> "FP8LinearMethod": + return FP8LinearMethod(self) def get_scaled_act_names(self) -> List[str]: return [] - -class Fp8SerializedLinearMethod(LinearMethodBase): +class FP8LinearMethod(LinearMethodBase): """Linear method for StaticFP8 . Args: quant_config: The quantization config. """ - def __init__(self, quant_config: FP8SerializedConfig): + def __init__(self, quant_config: FP8Config): self.quant_config = quant_config def create_weights( @@ -81,6 +87,17 @@ def create_weights( "shard_indexer": self.scales_shard_indexer, }) + if self.quant_config.scheme == "static": + act_scale = Parameter( + torch.empty(len(output_partition_sizes), dtype=torch.float32), + requires_grad=False + ) + layer.register_parameter("act_scale", act_scale) + set_weight_attrs(act_scale, extra_weight_attrs) + set_weight_attrs(act_scale, { + "shard_indexer": self.scales_shard_indexer, + }) + layer.logical_widths = output_partition_sizes def shard_id_as_int( @@ -121,7 +138,14 @@ def apply_weights(self, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: - qinput, x_scale = per_tensor_quantize(x) + w_scale = layer.weight_scale.max() + + if self.quant_config.scheme == "dynamic": + qinput, x_scale = per_tensor_quantize_dyanmic(x) + elif self.quant_config.scheme == "static": + # empirically, these are all the same + x_scale = layer.act_scale.max() + qinput = per_tensor_quantize_static(x, x_scale) # FOR LOOP TO BE REPLACED BY CUTLASS KERNEL W/ EPILOGUE FUSION output = torch.zeros(x.shape[0], layer.weight.shape[0], dtype=x.dtype, device="cuda") @@ -143,8 +167,22 @@ def apply_weights(self, return output -def per_tensor_quantize(tensor: torch.Tensor) -> tuple[torch.Tensor, float]: +def per_tensor_quantize_static(tensor: torch.Tensor, inv_scale: float) -> torch.Tensor: """Quantize a tensor using per-tensor static scaling factor. + Args: + tensor: The input tensor. + inv_scale: The scale. + """ + # Scale and clamp the tensor to bring it to + # the representative range of float8 data type + # (as default cast is unsaturated) + finfo = torch.finfo(torch.float8_e4m3fn) + qweight = (tensor / inv_scale).clamp(min=finfo.min, max=finfo.max) + return qweight.to(torch.float8_e4m3fn) + + +def per_tensor_quantize_dyanmic(tensor: torch.Tensor) -> tuple[torch.Tensor, float]: + """Quantize a tensor using per-tensor dynamic scaling factor. Args: tensor: The input tensor. """ From 1a3b2e1ead4ed17a00f8f22f70ce05d3f3b436b6 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Thu, 25 Apr 2024 14:12:22 +0000 Subject: [PATCH 43/90] fixed example --- run_fp8.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 19f991902d486..1ef12c3f352f9 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -1,12 +1,33 @@ from vllm import LLM from transformers import AutoTokenizer +import argparse -model = LLM("nm-testing/mistral-fp8-static", enforce_eager=True, max_model_len=1024) -tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") +parser = argparse.ArgumentParser() +parser.add_argument("--type", choices=["static", "dynamic"]) -prompt = tokenizer.apply_chat_template([{"role": "user", "content": "What is your name"}], tokenize=False, add_generation_prompt=True) -print(f"----- Prompt: {prompt}") +if __name__ == "__main__": + args = parser.parse_args() + + if args.type == "static": + model_name = "nm-testing/mistral-fp8-static" + elif args.type == "dynamic": + model_name = "nm-testing/mistral-fp8-dynamic" + else: + raise ValueError("--type should be `static` or `dynamic`") -outputs = model.generate(prompt) -generation = outputs[0].outputs[0].text -print(f"----- Generation: {generation}") + tokenizer_name = "mistralai/Mistral-7B-Instruct-v0.2" + + model = LLM( + model_name, + tokenizer=tokenizer_name, + enforce_eager=True, + max_model_len=1024) + + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + + prompt = tokenizer.apply_chat_template([{"role": "user", "content": "What is your name"}], tokenize=False, add_generation_prompt=True) + print(f"----- Prompt: {prompt}") + + outputs = model.generate(prompt) + generation = outputs[0].outputs[0].text + print(f"----- Generation: {generation}") From 63ad2ef747d046733fe1b671cded43b1beaba914 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:25:22 -0400 Subject: [PATCH 44/90] Delete quantize.ipynb --- quantize.ipynb | 250 ------------------------------------------------- 1 file changed, 250 deletions(-) delete mode 100644 quantize.ipynb diff --git a/quantize.ipynb b/quantize.ipynb deleted file mode 100644 index 17efdc17fd39b..0000000000000 --- a/quantize.ipynb +++ /dev/null @@ -1,250 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/paperspace/env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00, 1.94it/s]\n" - ] - } - ], - "source": [ - "import torch\n", - "from transformers import AutoModelForCausalLM\n", - "\n", - "model = AutoModelForCausalLM.from_pretrained(\n", - " \"mistralai/Mistral-7B-Instruct-v0.2\",\n", - " torch_dtype=torch.bfloat16,\n", - ")\n", - "model = model.to(\"cuda\")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Tuple\n", - "\n", - "def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]:\n", - " \"\"\"Quantize a tensor using per-tensor static scaling factor.\n", - "\n", - " Args:\n", - " tensor: The input tensor.\n", - " \"\"\"\n", - " finfo = torch.finfo(torch.float8_e4m3fn)\n", - " # Calculate the scale as dtype max divided by absmax.\n", - " # Since .abs() creates a new tensor, we use aminmax to get\n", - " # the min and max first and then calculate the absmax.\n", - " min_val, max_val = tensor.aminmax()\n", - " amax = min_val.abs().max(max_val.abs())\n", - " scale = finfo.max / amax.clamp(min=1e-12)\n", - " # scale and clamp the tensor to bring it to\n", - " # the representative range of float8 data type\n", - " # (as default cast is unsaturated)\n", - " qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max)\n", - " # Return both float8 data and the inverse scale (as float),\n", - " # as both required as inputs to torch._scaled_mm\n", - " qweight = qweight.to(torch.float8_e4m3fn)\n", - " scale = scale.float().reciprocal()\n", - " return qweight, scale" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "class LinearFP8(torch.nn.Module):\n", - " def __init__(self, qweight, scale):\n", - " super().__init__()\n", - " self.weight = torch.nn.Parameter(qweight, requires_grad=False)\n", - " self.weight_scale = torch.nn.Parameter(scale, requires_grad=False)\n", - " \n", - " def forward(self, x):\n", - " shape = x.shape\n", - " x = x.reshape(-1, shape[-1])\n", - " qinput, x_scale = per_tensor_quantize(x)\n", - " \n", - " output, _ = torch._scaled_mm(\n", - " qinput,\n", - " self.weight.t(),\n", - " out_dtype=x.dtype,\n", - " scale_a=x_scale,\n", - " scale_b=self.weight_scale,\n", - " bias=None,\n", - " )\n", - " return output.reshape(shape[0], shape[1], -1)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "SELF_ATTN_WEIGHTS = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\"]\n", - "MLP_WEIGHTS = [\"gate_proj\", \"up_proj\", \"down_proj\"]\n", - "\n", - "def quantize_proj(module, proj_name):\n", - " proj = getattr(module, proj_name)\n", - " quant_weight, quant_scale = per_tensor_quantize(proj.weight)\n", - " quant_proj = LinearFP8(quant_weight, quant_scale)\n", - " \n", - " del proj\n", - " setattr(module, proj_name, quant_proj)\n", - "\n", - "for layer in model.model.layers:\n", - " for proj_name in SELF_ATTN_WEIGHTS:\n", - " quantize_proj(layer.self_attn, proj_name)\n", - " for proj_name in MLP_WEIGHTS:\n", - " quantize_proj(layer.mlp, proj_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "MistralForCausalLM(\n", - " (model): MistralModel(\n", - " (embed_tokens): Embedding(32000, 4096)\n", - " (layers): ModuleList(\n", - " (0-31): 32 x MistralDecoderLayer(\n", - " (self_attn): MistralSdpaAttention(\n", - " (q_proj): LinearFP8()\n", - " (k_proj): LinearFP8()\n", - " (v_proj): LinearFP8()\n", - " (o_proj): LinearFP8()\n", - " (rotary_emb): MistralRotaryEmbedding()\n", - " )\n", - " (mlp): MistralMLP(\n", - " (gate_proj): LinearFP8()\n", - " (up_proj): LinearFP8()\n", - " (down_proj): LinearFP8()\n", - " (act_fn): SiLU()\n", - " )\n", - " (input_layernorm): MistralRMSNorm()\n", - " (post_attention_layernorm): MistralRMSNorm()\n", - " )\n", - " )\n", - " (norm): MistralRMSNorm()\n", - " )\n", - " (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n", - ")" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "from transformers import AutoTokenizer\n", - "tokenizer = AutoTokenizer.from_pretrained(\n", - " \"mistralai/Mistral-7B-Instruct-v0.2\"\n", - ")\n", - "tokenizer.pad_token_id = tokenizer.eos_token_id" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "input_ids = tokenizer.apply_chat_template(\n", - " [{\"role\": \"user\", \"content\": \"What is your name?\" }],\n", - " return_tensors=\"pt\"\n", - ").to(\"cuda\")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", - "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " [INST] What is your name? [/INST] I don't have a name. I'm just a computer program designed to assist with information\n" - ] - } - ], - "source": [ - "output = model.generate(input_ids=input_ids, max_new_tokens=20)\n", - "print(tokenizer.decode(output[0]))" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# hacked transformers/modeling_utils/dtype_byte_size to make this work\n", - "model.save_pretrained(\"mistral-fp8-static\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mod" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 794f1a185e179ed4e3a59e27e1d55cc0064e8283 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Thu, 25 Apr 2024 13:45:11 -0700 Subject: [PATCH 45/90] Update vllm/_custom_ops.py Co-authored-by: Woosuk Kwon --- vllm/_custom_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 9d40ec4928276..40fdb2cef380f 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -160,7 +160,7 @@ def scaled_fp8_quant( ) -> Tuple[torch.Tensor, torch.Tensor]: output = torch.empty_like(input, dtype=torch.float8_e4m3fn) if scale is None: - scale = torch.zeros(1, device=input.device, dtype=torch.float32) + scale = torch.empty(1, device=input.device, dtype=torch.float32) vllm_ops.dynamic_scaled_fp8_quant(output, input, scale) else: vllm_ops.static_scaled_fp8_quant(output, input, scale) From c13b6a49ac5ab715e6ca5d38d747ddfc68876018 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Thu, 25 Apr 2024 14:08:37 -0700 Subject: [PATCH 46/90] update --- vllm/model_executor/layers/quantization/fp8.py | 8 ++++---- vllm/model_executor/models/mixtral.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index ab562288a8f7d..271cddc1175a5 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -15,9 +15,9 @@ class FP8Config(QuantizationConfig): def __init__( self, - act_scaling: str = "dynamic", + activation_scheme: str = "dynamic", ) -> None: - self.act_scaling = act_scaling + self.activation_scheme = activation_scheme @classmethod def get_name(cls) -> str: @@ -44,8 +44,8 @@ def get_config_filenames(cls) -> List[str]: @classmethod def from_config(cls, config: Dict[str, Any]) -> "FP8Config": - act_scaling = cls.get_from_keys(config, ["act_scaling"]) - return cls(act_scaling) + activation_scheme = cls.get_from_keys(config, ["activation_scheme"]) + return cls(activation_scheme) def get_linear_method(self) -> "Fp8LinearMethod": return Fp8LinearMethod(self) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 4439a75e5b57f..6f6c93b273335 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -123,7 +123,7 @@ def __init__( # Scaling factors for FP8 activations need_act_scales = (self.use_fp8 and - linear_method.quant_config.act_scaling == "static") + linear_method.quant_config.activation_scheme == "static") self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None From 5a230ed4794899bc20518f9cf48cc59ed40a97d4 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Thu, 25 Apr 2024 14:12:54 -0700 Subject: [PATCH 47/90] update --- vllm/model_executor/models/mixtral.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 6f6c93b273335..c7199338b5525 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -123,7 +123,8 @@ def __init__( # Scaling factors for FP8 activations need_act_scales = (self.use_fp8 and - linear_method.quant_config.activation_scheme == "static") + linear_method.quant_config.activation_scheme + == "static") self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None From 80069c977123957a4e814dcb2485cb14cf5b3bcf Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Thu, 25 Apr 2024 14:17:00 -0700 Subject: [PATCH 48/90] format --- vllm/model_executor/models/mixtral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index c7199338b5525..ad6f43fbd4aa0 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -122,8 +122,8 @@ def __init__( requires_grad=False) if self.use_fp8 else None # Scaling factors for FP8 activations - need_act_scales = (self.use_fp8 and - linear_method.quant_config.activation_scheme + need_act_scales = (self.use_fp8 + and linear_method.quant_config.activation_scheme == "static") self.as_scale = nn.Parameter( torch.zeros(1, device="cuda", dtype=torch.float32), From 5ce17d0cec2d7f43ebff895536f29ef356009534 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Thu, 25 Apr 2024 14:27:41 -0700 Subject: [PATCH 49/90] activation_scale -> act_scale --- vllm/model_executor/models/mixtral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index ad6f43fbd4aa0..dad1c43e1b0da 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -153,7 +153,7 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, shard_size:2 * shard_size, :] = loaded_weight[shard, :] if weight_name.endswith("w2.weight"): param_data[expert_id, :, :] = loaded_weight[:, shard] - if "activation_scale" in weight_name: + if "act_scale" in weight_name: param_data[:] = param_data[:].max(loaded_weight) def process_weights_after_loading(self): @@ -476,7 +476,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): # These are the activation scales for the experts # (param_name, weight_name, expert_id) ("as_scale" if weight_name in ["w1", "w3"] else "a2s_scale", - f"experts.{expert_id}.{weight_name}.activation_scale", expert_id) + f"experts.{expert_id}.{weight_name}.act_scale", expert_id) for expert_id in range(self.config.num_local_experts) for weight_name in ["w1", "w2", "w3"] ] From 5fc033516381b4f36f6e4a2620352789edb8cf1b Mon Sep 17 00:00:00 2001 From: mgoin Date: Thu, 25 Apr 2024 21:48:33 +0000 Subject: [PATCH 50/90] Update scheme->activation_scheme --- .../layers/quantization/fp8_serialized.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8_serialized.py b/vllm/model_executor/layers/quantization/fp8_serialized.py index c187bed817412..d9f21dab8fb27 100644 --- a/vllm/model_executor/layers/quantization/fp8_serialized.py +++ b/vllm/model_executor/layers/quantization/fp8_serialized.py @@ -13,10 +13,10 @@ class FP8Config(QuantizationConfig): """Config class for FP8.""" def __init__( self, - scheme: str, + activation_scheme: str, ) -> None: - assert scheme == "static" or scheme == "dynamic" - self.scheme = scheme + assert activation_scheme == "static" or activation_scheme == "dynamic" + self.activation_scheme = activation_scheme @classmethod def get_name(cls) -> str: @@ -36,8 +36,8 @@ def get_config_filenames(cls) -> List[str]: @classmethod def from_config(cls, config: Dict[str, Any]) -> "FP8Config": - scheme = cls.get_from_keys(config, ["scheme"]) - return cls(scheme=scheme) + activation_scheme = cls.get_from_keys(config, ["activation_scheme"]) + return cls(activation_scheme=activation_scheme) def get_linear_method(self) -> "FP8LinearMethod": return FP8LinearMethod(self) @@ -87,7 +87,7 @@ def create_weights( "shard_indexer": self.scales_shard_indexer, }) - if self.quant_config.scheme == "static": + if self.quant_config.activation_scheme == "static": act_scale = Parameter( torch.empty(len(output_partition_sizes), dtype=torch.float32), requires_grad=False @@ -140,9 +140,9 @@ def apply_weights(self, w_scale = layer.weight_scale.max() - if self.quant_config.scheme == "dynamic": + if self.quant_config.activation_scheme == "dynamic": qinput, x_scale = per_tensor_quantize_dyanmic(x) - elif self.quant_config.scheme == "static": + elif self.quant_config.activation_scheme == "static": # empirically, these are all the same x_scale = layer.act_scale.max() qinput = per_tensor_quantize_static(x, x_scale) From 92d5162ee6298f2557ded542d5157f7bf3db097d Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Thu, 25 Apr 2024 14:50:36 -0700 Subject: [PATCH 51/90] fix dynamic scaling -- need init to zero due to atomic update --- vllm/_custom_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 40fdb2cef380f..9d40ec4928276 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -160,7 +160,7 @@ def scaled_fp8_quant( ) -> Tuple[torch.Tensor, torch.Tensor]: output = torch.empty_like(input, dtype=torch.float8_e4m3fn) if scale is None: - scale = torch.empty(1, device=input.device, dtype=torch.float32) + scale = torch.zeros(1, device=input.device, dtype=torch.float32) vllm_ops.dynamic_scaled_fp8_quant(output, input, scale) else: vllm_ops.static_scaled_fp8_quant(output, input, scale) From e1bfe1027e1726de51bb33fc6327623650d538a5 Mon Sep 17 00:00:00 2001 From: mgoin Date: Thu, 25 Apr 2024 22:05:19 +0000 Subject: [PATCH 52/90] Format --- run_fp8.py | 26 ++++++---- vllm/model_executor/layers/linear.py | 32 ++++++------ .../layers/quantization/fp8_serialized.py | 50 ++++++++++--------- 3 files changed, 58 insertions(+), 50 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 1ef12c3f352f9..113dca8401494 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -1,13 +1,15 @@ -from vllm import LLM +import argparse + from transformers import AutoTokenizer -import argparse + +from vllm import LLM parser = argparse.ArgumentParser() parser.add_argument("--type", choices=["static", "dynamic"]) if __name__ == "__main__": args = parser.parse_args() - + if args.type == "static": model_name = "nm-testing/mistral-fp8-static" elif args.type == "dynamic": @@ -17,15 +19,19 @@ tokenizer_name = "mistralai/Mistral-7B-Instruct-v0.2" - model = LLM( - model_name, - tokenizer=tokenizer_name, - enforce_eager=True, - max_model_len=1024) - + model = LLM(model_name, + tokenizer=tokenizer_name, + enforce_eager=True, + max_model_len=1024) + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) - prompt = tokenizer.apply_chat_template([{"role": "user", "content": "What is your name"}], tokenize=False, add_generation_prompt=True) + prompt = tokenizer.apply_chat_template([{ + "role": "user", + "content": "What is your name" + }], + tokenize=False, + add_generation_prompt=True) print(f"----- Prompt: {prompt}") outputs = model.generate(prompt) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 01af70a34c488..61670b34e16b4 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -285,7 +285,7 @@ def weight_loader(self, param_data = param.data output_dim = getattr(param, "output_dim", None) is_metadata = getattr(param, "is_metadata", False) - + # TODO: document. # TODO: sync with is_metadata. # For loading scales. @@ -293,14 +293,12 @@ def weight_loader(self, if output_dim is not None and shard_indexer is not None: raise NotImplementedError( "We do not currently support output_dim != None and " - "shard_indexer != None for a parameter. Please open an issue." - ) + "shard_indexer != None for a parameter. Please open an issue.") if loaded_shard_id is None and shard_indexer is not None: raise NotImplementedError( "We do not currently support loaded_shard_id == None and " - "shard_indexer != None for a parameter. Please open an issue." - ) - + "shard_indexer != None for a parameter. Please open an issue.") + if loaded_shard_id is None: # Loaded weight is already packed. if output_dim is None: @@ -358,13 +356,14 @@ def weight_loader(self, shard_size = loaded_weight.shape[0] shard_offset = loaded_shard_id * shard_size param_data = param_data.narrow(0, shard_offset, shard_size) - + # TODO: sync with is_metadata UX. # If a param_shard_splitter is defined by the LinearMethod, use it. elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer( - param_data, loaded_weight, loaded_shard_id) - + param_data, loaded_weight = shard_indexer(param_data, + loaded_weight, + loaded_shard_id) + else: ignore_warning = getattr(param, "ignore_warning", False) if not ignore_warning: @@ -446,19 +445,17 @@ def weight_loader(self, param_data = param.data output_dim = getattr(param, "output_dim", None) is_metadata = getattr(param, "is_metadata", False) - + # TODO: sync with is_metadata UX shard_indexer = getattr(param, "shard_indexer", None) if output_dim is not None and shard_indexer is not None: raise NotImplementedError( "We do not currently support output_dim != None and " - "shard_indexer != None for a parameter. Please open an issue." - ) + "shard_indexer != None for a parameter. Please open an issue.") if loaded_shard_id is None and shard_indexer is not None: raise NotImplementedError( "We do not currently support loaded_shard_id == None and " - "shard_indexer != None for a parameter. Please open an issue." - ) + "shard_indexer != None for a parameter. Please open an issue.") if loaded_shard_id is None: # Loaded weight is already packed. @@ -535,8 +532,9 @@ def weight_loader(self, # TODO: sync with QKV # If a param_shard_splitter is defined by the LinearMethod, use it. elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer( - param_data, loaded_weight, loaded_shard_id) + param_data, loaded_weight = shard_indexer(param_data, + loaded_weight, + loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) if not ignore_warning: diff --git a/vllm/model_executor/layers/quantization/fp8_serialized.py b/vllm/model_executor/layers/quantization/fp8_serialized.py index d9f21dab8fb27..1179dc07a48d6 100644 --- a/vllm/model_executor/layers/quantization/fp8_serialized.py +++ b/vllm/model_executor/layers/quantization/fp8_serialized.py @@ -11,6 +11,7 @@ class FP8Config(QuantizationConfig): """Config class for FP8.""" + def __init__( self, activation_scheme: str, @@ -45,6 +46,7 @@ def get_linear_method(self) -> "FP8LinearMethod": def get_scaled_act_names(self) -> List[str]: return [] + class FP8LinearMethod(LinearMethodBase): """Linear method for StaticFP8 . @@ -66,7 +68,7 @@ def create_weights( **extra_weight_attrs, ): del input_size, output_size, params_dtype - + weight = Parameter(torch.empty(sum(output_partition_sizes), input_size_per_partition, dtype=torch.float8_e4m3fn), @@ -75,12 +77,12 @@ def create_weights( set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) set_weight_attrs(weight, extra_weight_attrs) - weight_scale = Parameter( - torch.empty( - len(output_partition_sizes), - device='cuda', dtype=torch.float32, - ), requires_grad=False - ) + weight_scale = Parameter(torch.empty( + len(output_partition_sizes), + device='cuda', + dtype=torch.float32, + ), + requires_grad=False) layer.register_parameter("weight_scale", weight_scale) set_weight_attrs(weight_scale, extra_weight_attrs) set_weight_attrs(weight_scale, { @@ -88,10 +90,9 @@ def create_weights( }) if self.quant_config.activation_scheme == "static": - act_scale = Parameter( - torch.empty(len(output_partition_sizes), dtype=torch.float32), - requires_grad=False - ) + act_scale = Parameter(torch.empty(len(output_partition_sizes), + dtype=torch.float32), + requires_grad=False) layer.register_parameter("act_scale", act_scale) set_weight_attrs(act_scale, extra_weight_attrs) set_weight_attrs(act_scale, { @@ -100,14 +101,11 @@ def create_weights( layer.logical_widths = output_partition_sizes - def shard_id_as_int( - self, - shard_id: Union[str, int] - ) -> int: + def shard_id_as_int(self, shard_id: Union[str, int]) -> int: if isinstance(shard_id, int): return shard_id assert isinstance(shard_id, str) - qkv_idxs = { "q": 0, "k": 1, "v": 2 } + qkv_idxs = {"q": 0, "k": 1, "v": 2} assert shard_id in qkv_idxs return qkv_idxs[shard_id] @@ -119,12 +117,12 @@ def shard_id_as_int( # logical_widths: torch.Tensor # ) -> Tuple[torch.Tensor, torch.Tensor]: # shard_id = self.shard_id_as_int(shard_id) - # offset = sum(logical_widths[:shard_id]) + # offset = sum(logical_widths[:shard_id]) # size = logical_widths[shard_id] # # update loaded weight with copies for broadcast. # loaded_weight = loaded_weight.repeat(size) # return param[offset : offset + size], loaded_weight - + def scales_shard_indexer( self, param: torch.Tensor, @@ -146,11 +144,15 @@ def apply_weights(self, # empirically, these are all the same x_scale = layer.act_scale.max() qinput = per_tensor_quantize_static(x, x_scale) - + # FOR LOOP TO BE REPLACED BY CUTLASS KERNEL W/ EPILOGUE FUSION - output = torch.zeros(x.shape[0], layer.weight.shape[0], dtype=x.dtype, device="cuda") + output = torch.zeros(x.shape[0], + layer.weight.shape[0], + dtype=x.dtype, + device="cuda") start_offset = 0 - for _, (logical_width, w_scale) in enumerate(zip(layer.logical_widths, layer.weight_scale)): + for _, (logical_width, w_scale) in enumerate( + zip(layer.logical_widths, layer.weight_scale)): end_offset = start_offset + logical_width out, _ = torch._scaled_mm( @@ -167,7 +169,8 @@ def apply_weights(self, return output -def per_tensor_quantize_static(tensor: torch.Tensor, inv_scale: float) -> torch.Tensor: +def per_tensor_quantize_static(tensor: torch.Tensor, + inv_scale: float) -> torch.Tensor: """Quantize a tensor using per-tensor static scaling factor. Args: tensor: The input tensor. @@ -181,7 +184,8 @@ def per_tensor_quantize_static(tensor: torch.Tensor, inv_scale: float) -> torch. return qweight.to(torch.float8_e4m3fn) -def per_tensor_quantize_dyanmic(tensor: torch.Tensor) -> tuple[torch.Tensor, float]: +def per_tensor_quantize_dyanmic( + tensor: torch.Tensor) -> tuple[torch.Tensor, float]: """Quantize a tensor using per-tensor dynamic scaling factor. Args: tensor: The input tensor. From 72426000873ab6b07e7cf41b65d38ed915ecf379 Mon Sep 17 00:00:00 2001 From: mgoin Date: Thu, 25 Apr 2024 22:34:55 +0000 Subject: [PATCH 53/90] Fix tuple type --- vllm/model_executor/layers/quantization/fp8_serialized.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8_serialized.py b/vllm/model_executor/layers/quantization/fp8_serialized.py index 1179dc07a48d6..1d0b44f37b155 100644 --- a/vllm/model_executor/layers/quantization/fp8_serialized.py +++ b/vllm/model_executor/layers/quantization/fp8_serialized.py @@ -185,7 +185,7 @@ def per_tensor_quantize_static(tensor: torch.Tensor, def per_tensor_quantize_dyanmic( - tensor: torch.Tensor) -> tuple[torch.Tensor, float]: + tensor: torch.Tensor) -> Tuple[torch.Tensor, float]: """Quantize a tensor using per-tensor dynamic scaling factor. Args: tensor: The input tensor. From 21ddbb4049fef3f2cfaa0d3bfe89fe96b6e5ed92 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Fri, 26 Apr 2024 11:09:08 +0000 Subject: [PATCH 54/90] stash tyler's state --- requirements-cuda.txt | 1 + run_fp8.py | 2 +- .../layers/fused_gemm_dq/__init__.py | 5 + vllm/model_executor/layers/linear.py | 8 +- .../layers/quantization/fp8_serialized.py | 112 ++++++++++-------- 5 files changed, 75 insertions(+), 53 deletions(-) create mode 100644 vllm/model_executor/layers/fused_gemm_dq/__init__.py diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 1bddae4c6f40f..b873423911488 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -7,3 +7,4 @@ nvidia-ml-py # for pynvml package vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library torch == 2.2.1 xformers == 0.0.25 # Requires PyTorch 2.2.1 +nvidia-cutlass diff --git a/run_fp8.py b/run_fp8.py index 113dca8401494..5c4b4529cdbb1 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -11,7 +11,7 @@ args = parser.parse_args() if args.type == "static": - model_name = "nm-testing/mistral-fp8-static" + model_name = "nm-testing/Meta-Llama-3-8B-Instruct-FP8" elif args.type == "dynamic": model_name = "nm-testing/mistral-fp8-dynamic" else: diff --git a/vllm/model_executor/layers/fused_gemm_dq/__init__.py b/vllm/model_executor/layers/fused_gemm_dq/__init__.py new file mode 100644 index 0000000000000..2acb1c2aad6c7 --- /dev/null +++ b/vllm/model_executor/layers/fused_gemm_dq/__init__.py @@ -0,0 +1,5 @@ +from vllm.model_executor.layers.fused_gemm_dq.fused_gemm_dq_fp8 import fused_gemm_dq_fp8 + +__all__ = [ + "fused_gemm_dq_fp8", +] \ No newline at end of file diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 0f590a98688fb..87b2bb7d9acce 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -300,6 +300,7 @@ def weight_loader(self, # TODO: sync with is_metadata. # For loading scales. shard_indexer = getattr(param, "shard_indexer", None) + logical_widths = getattr(param, "logical_widths", None) if output_dim is not None and shard_indexer is not None: raise NotImplementedError( "We do not currently support output_dim != None and " @@ -372,7 +373,8 @@ def weight_loader(self, elif shard_indexer is not None: param_data, loaded_weight = shard_indexer(param_data, loaded_weight, - loaded_shard_id) + loaded_shard_id, + logical_widths) else: ignore_warning = getattr(param, "ignore_warning", False) @@ -458,6 +460,7 @@ def weight_loader(self, # TODO: sync with is_metadata UX shard_indexer = getattr(param, "shard_indexer", None) + logical_widths = getattr(param, "logical_widths", None) if output_dim is not None and shard_indexer is not None: raise NotImplementedError( "We do not currently support output_dim != None and " @@ -544,7 +547,8 @@ def weight_loader(self, elif shard_indexer is not None: param_data, loaded_weight = shard_indexer(param_data, loaded_weight, - loaded_shard_id) + loaded_shard_id, + logical_widths) else: ignore_warning = getattr(param, "ignore_warning", False) if not ignore_warning: diff --git a/vllm/model_executor/layers/quantization/fp8_serialized.py b/vllm/model_executor/layers/quantization/fp8_serialized.py index 1d0b44f37b155..da062a8a1492b 100644 --- a/vllm/model_executor/layers/quantization/fp8_serialized.py +++ b/vllm/model_executor/layers/quantization/fp8_serialized.py @@ -3,6 +3,7 @@ import torch from torch.nn.parameter import Parameter +from vllm.model_executor.layers.fused_gemm_dq import fused_gemm_dq_fp8 from vllm.model_executor.layers.linear import (LinearMethodBase, set_weight_attrs) from vllm.model_executor.layers.quantization.base_config import ( @@ -68,7 +69,10 @@ def create_weights( **extra_weight_attrs, ): del input_size, output_size, params_dtype + num_logical_weights = len(output_partition_sizes) + layer.logical_widths = output_partition_sizes + # WEIGHT weight = Parameter(torch.empty(sum(output_partition_sizes), input_size_per_partition, dtype=torch.float8_e4m3fn), @@ -76,19 +80,29 @@ def create_weights( layer.register_parameter("weight", weight) set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) set_weight_attrs(weight, extra_weight_attrs) + + # WEIGHT SCALE + if num_logical_weights == 1: + weight_scale = Parameter( + torch.empty(len(output_partition_sizes), dtype=torch.float32), + requires_grad=False) + + layer.register_parameter("weight_scale", weight_scale) + set_weight_attrs(weight_scale, extra_weight_attrs) + set_weight_attrs(weight_scale, { + "shard_indexer": self.scales_shard_indexer, + }) + else: + weight_scale = Parameter( + torch.empty(sum(output_partition_sizes), dtype=torch.float32), + requires_grad=False) + layer.register_parameter("weight_scale", weight_scale) + set_weight_attrs(weight_scale, extra_weight_attrs) + set_weight_attrs(weight_scale, { + "shard_indexer": self.scales_shard_indexer, + }) - weight_scale = Parameter(torch.empty( - len(output_partition_sizes), - device='cuda', - dtype=torch.float32, - ), - requires_grad=False) - layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs(weight_scale, extra_weight_attrs) - set_weight_attrs(weight_scale, { - "shard_indexer": self.scales_shard_indexer, - }) - + # ACT Scale if self.quant_config.activation_scheme == "static": act_scale = Parameter(torch.empty(len(output_partition_sizes), dtype=torch.float32), @@ -99,8 +113,6 @@ def create_weights( "shard_indexer": self.scales_shard_indexer, }) - layer.logical_widths = output_partition_sizes - def shard_id_as_int(self, shard_id: Union[str, int]) -> int: if isinstance(shard_id, int): return shard_id @@ -109,26 +121,28 @@ def shard_id_as_int(self, shard_id: Union[str, int]) -> int: assert shard_id in qkv_idxs return qkv_idxs[shard_id] - # def scales_shard_splitter_NKK( - # self, - # param: torch.Tensor, - # loaded_weight: torch.Tensor, - # shard_id: Union[str, int], - # logical_widths: torch.Tensor - # ) -> Tuple[torch.Tensor, torch.Tensor]: - # shard_id = self.shard_id_as_int(shard_id) - # offset = sum(logical_widths[:shard_id]) - # size = logical_widths[shard_id] - # # update loaded weight with copies for broadcast. - # loaded_weight = loaded_weight.repeat(size) - # return param[offset : offset + size], loaded_weight + def scales_shard_indexer_NKK( + self, + param: torch.Tensor, + loaded_weight: torch.Tensor, + shard_id: Union[str, int], + logical_widths, + ) -> Tuple[torch.Tensor, torch.Tensor]: + shard_id = self.shard_id_as_int(shard_id) + offset = sum(logical_widths[:shard_id]) + size = logical_widths[shard_id] + # update loaded weight with copies for broadcast. + loaded_weight = loaded_weight.repeat(size) + return param[offset:offset + size], loaded_weight def scales_shard_indexer( self, param: torch.Tensor, loaded_weight: torch.Tensor, shard_id: Union[str, int], + logical_widths, ) -> Tuple[torch.Tensor, torch.Tensor]: + del logical_widths return param[self.shard_id_as_int(shard_id)], loaded_weight def apply_weights(self, @@ -136,7 +150,7 @@ def apply_weights(self, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: - w_scale = layer.weight_scale.max() + m = x.shape[0] if self.quant_config.activation_scheme == "dynamic": qinput, x_scale = per_tensor_quantize_dyanmic(x) @@ -145,28 +159,26 @@ def apply_weights(self, x_scale = layer.act_scale.max() qinput = per_tensor_quantize_static(x, x_scale) - # FOR LOOP TO BE REPLACED BY CUTLASS KERNEL W/ EPILOGUE FUSION - output = torch.zeros(x.shape[0], - layer.weight.shape[0], - dtype=x.dtype, - device="cuda") - start_offset = 0 - for _, (logical_width, w_scale) in enumerate( - zip(layer.logical_widths, layer.weight_scale)): - end_offset = start_offset + logical_width - - out, _ = torch._scaled_mm( - qinput, - layer.weight[start_offset:end_offset, :].t(), - out_dtype=x.dtype, - scale_a=x_scale, - scale_b=w_scale, - bias=bias, - ) - output[:, start_offset:end_offset] = out - start_offset = end_offset - - return output + # HACK: CUTLASS hits an illegal instruction for fp8 when m == 1, + # so pad m to 2 + if m == 1: + tmp = torch.zeros(2, + qinput.shape[1], + dtype=qinput.dtype, + device="cuda") + tmp[0, :] = qinput + qinput = tmp + + out_cutlass_qkv = fused_gemm_dq_fp8( + qinput, + layer.weight.t(), + out_dtype=x.dtype, + scale_a=x_scale, + scale_b=layer.weight_scale, + bias=bias, + ) + + return out_cutlass_qkv[:m, :] def per_tensor_quantize_static(tensor: torch.Tensor, From d27015cc9bdd3686b632e7e74629b918b5db8b1a Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Fri, 26 Apr 2024 11:20:37 +0000 Subject: [PATCH 55/90] stash --- run_fp8.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 5c4b4529cdbb1..702f8e6df9eea 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -11,13 +11,14 @@ args = parser.parse_args() if args.type == "static": - model_name = "nm-testing/Meta-Llama-3-8B-Instruct-FP8" + model_name = "nm-testing/mistral-fp8-static" elif args.type == "dynamic": model_name = "nm-testing/mistral-fp8-dynamic" else: raise ValueError("--type should be `static` or `dynamic`") - tokenizer_name = "mistralai/Mistral-7B-Instruct-v0.2" + # tokenizer_name = "mistralai/Mistral-7B-Instruct-v0.2" + tokenizer_name = model_name model = LLM(model_name, tokenizer=tokenizer_name, @@ -29,9 +30,7 @@ prompt = tokenizer.apply_chat_template([{ "role": "user", "content": "What is your name" - }], - tokenize=False, - add_generation_prompt=True) + }], tokenize=False, add_generation_prompt=True) print(f"----- Prompt: {prompt}") outputs = model.generate(prompt) From 1111f8789b8008a5263615ee22be66e777312b39 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Fri, 26 Apr 2024 11:37:45 +0000 Subject: [PATCH 56/90] cutlass working, but slow jitting on hotpath --- run_fp8.py | 6 +----- vllm/model_executor/layers/quantization/fp8_serialized.py | 7 +++++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 702f8e6df9eea..2c1b071cc5e1b 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -17,15 +17,11 @@ else: raise ValueError("--type should be `static` or `dynamic`") - # tokenizer_name = "mistralai/Mistral-7B-Instruct-v0.2" - tokenizer_name = model_name - model = LLM(model_name, - tokenizer=tokenizer_name, enforce_eager=True, max_model_len=1024) - tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) prompt = tokenizer.apply_chat_template([{ "role": "user", diff --git a/vllm/model_executor/layers/quantization/fp8_serialized.py b/vllm/model_executor/layers/quantization/fp8_serialized.py index da062a8a1492b..a3511bdb02fa3 100644 --- a/vllm/model_executor/layers/quantization/fp8_serialized.py +++ b/vllm/model_executor/layers/quantization/fp8_serialized.py @@ -97,9 +97,11 @@ def create_weights( torch.empty(sum(output_partition_sizes), dtype=torch.float32), requires_grad=False) layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs(weight_scale, extra_weight_attrs) + # set_weight_attrs(weight_scale, extra_weight_attrs) set_weight_attrs(weight_scale, { - "shard_indexer": self.scales_shard_indexer, + **extra_weight_attrs, + "shard_indexer": self.scales_shard_indexer_NKK, + "logical_widths": output_partition_sizes }) # ACT Scale @@ -113,6 +115,7 @@ def create_weights( "shard_indexer": self.scales_shard_indexer, }) + def shard_id_as_int(self, shard_id: Union[str, int]) -> int: if isinstance(shard_id, int): return shard_id From f5d32aee06d270215ae4629ff032a45c8bc83c7f Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Fri, 26 Apr 2024 12:29:25 +0000 Subject: [PATCH 57/90] first end to end run with mixtral --- run_fp8.py | 15 +++++-- vllm/model_executor/models/mixtral.py | 65 ++++++++++++++++++--------- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 2c1b071cc5e1b..0caf538de1fef 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -4,18 +4,24 @@ from vllm import LLM +choices = ["llama-static", "mistral-static", "mistral-dynamic", "mixtral-static"] + parser = argparse.ArgumentParser() -parser.add_argument("--type", choices=["static", "dynamic"]) +parser.add_argument("--type", choices="mixtral-static") if __name__ == "__main__": args = parser.parse_args() - if args.type == "static": + if args.type == "llama-static": + model_name = "nm-testing/Meta-Llama-3-8B-Instruct-FP8" + elif args.type == "mistral-static": model_name = "nm-testing/mistral-fp8-static" - elif args.type == "dynamic": + elif args.type == "mistral-dynamic": model_name = "nm-testing/mistral-fp8-dynamic" + elif args.type == 'mixtral-static': + model_name = "nm-testing/Mixtral-8x7B-Instruct-v0.1-FP8" else: - raise ValueError("--type should be `static` or `dynamic`") + raise ValueError(f"--type should be in {choices}") model = LLM(model_name, enforce_eager=True, @@ -30,5 +36,6 @@ print(f"----- Prompt: {prompt}") outputs = model.generate(prompt) + print(outputs) generation = outputs[0].outputs[0].text print(f"----- Generation: {generation}") diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index dad1c43e1b0da..2f7aca2c3b0e9 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -39,6 +39,7 @@ ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor +from vllm.model_executor.layers.quantization.fp8_serialized import FP8LinearMethod from vllm.model_executor.layers.quantization.fp8 import (Fp8LinearMethod, per_tensor_quantize) from vllm.model_executor.layers.rotary_embedding import get_rope @@ -79,7 +80,8 @@ def __init__( self.intermediate_size = intermediate_size // self.tp_size # FIXME(pcmoritz): Make this more general to support different # quantization schemes - self.use_fp8 = isinstance(linear_method, Fp8LinearMethod) + self.use_fp8 = (isinstance(linear_method, Fp8LinearMethod) or + isinstance(linear_method, FP8LinearMethod)) if params_dtype is None: params_dtype = torch.get_default_dtype() @@ -90,19 +92,22 @@ def __init__( bias=False, params_dtype=self.params_dtype, linear_method=None) + + if self.use_fp8: + params_dtype = torch.float8_e4m3fn self.ws = nn.Parameter( torch.empty(self.num_total_experts, 2 * self.intermediate_size, self.hidden_size, device="cuda", - dtype=self.params_dtype)) + dtype=params_dtype)) self.w2s = nn.Parameter( torch.empty(self.num_total_experts, self.hidden_size, self.intermediate_size, device="cuda", - dtype=self.params_dtype)) + dtype=params_dtype)) set_weight_attrs(self.ws, { "weight_loader": self.weight_loader, @@ -120,16 +125,23 @@ def __init__( torch.ones( self.num_total_experts, device="cuda", dtype=torch.float32), requires_grad=False) if self.use_fp8 else None + + set_weight_attrs(self.ws_scale, { + "weight_loader": self.weight_loader, + }) + set_weight_attrs(self.w2s_scale, { + "weight_loader": self.weight_loader, + }) # Scaling factors for FP8 activations need_act_scales = (self.use_fp8 and linear_method.quant_config.activation_scheme == "static") self.as_scale = nn.Parameter( - torch.zeros(1, device="cuda", dtype=torch.float32), + torch.zeros(self.num_total_experts, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None self.a2s_scale = nn.Parameter( - torch.zeros(1, device="cuda", dtype=torch.float32), + torch.zeros(self.num_total_experts, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None if need_act_scales: @@ -152,27 +164,29 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, param_data[expert_id, shard_size:2 * shard_size, :] = loaded_weight[shard, :] if weight_name.endswith("w2.weight"): - param_data[expert_id, :, :] = loaded_weight[:, shard] - if "act_scale" in weight_name: - param_data[:] = param_data[:].max(loaded_weight) - - def process_weights_after_loading(self): - if self.use_fp8: - ws = torch.empty_like(self.ws.data, dtype=torch.float8_e4m3fn) - w2s = torch.empty_like(self.w2s.data, dtype=torch.float8_e4m3fn) - for expert in range(self.num_total_experts): - ws[expert, :, :], self.ws_scale[expert] = per_tensor_quantize( - self.ws.data[expert, :, :]) - w2s[expert, :, :], self.w2s_scale[ - expert] = per_tensor_quantize(self.w2s.data[expert, :, :]) - self.ws = nn.Parameter(ws, requires_grad=False) - self.w2s = nn.Parameter(w2s, requires_grad=False) + param_data[expert_id] = loaded_weight[:, shard] + if "act_scale" in weight_name or "weight_scale" in weight_name: + param_data[expert_id] = loaded_weight + + # def process_weights_after_loading(self): + # if self.use_fp8: + # ws = torch.empty_like(self.ws.data, dtype=torch.float8_e4m3fn) + # w2s = torch.empty_like(self.w2s.data, dtype=torch.float8_e4m3fn) + # for expert in range(self.num_total_experts): + # ws[expert, :, :], self.ws_scale[expert] = per_tensor_quantize( + # self.ws.data[expert, :, :]) + # w2s[expert, :, :], self.w2s_scale[ + # expert] = per_tensor_quantize(self.w2s.data[expert, :, :]) + # self.ws = nn.Parameter(ws, requires_grad=False) + # self.w2s = nn.Parameter(w2s, requires_grad=False) def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: num_tokens, hidden_size = hidden_states.shape hidden_states = hidden_states.view(-1, self.hidden_size) # router_logits: (num_tokens, n_experts) router_logits, _ = self.gate(hidden_states) + + # TODO: fused MoE kernel might want to take different scales for each expert? final_hidden_states = fused_moe(hidden_states, self.ws, self.w2s, @@ -183,8 +197,8 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: use_fp8=self.use_fp8, w1_scale=self.ws_scale, w2_scale=self.w2s_scale, - a1_scale=self.as_scale, - a2_scale=self.a2s_scale) + a1_scale=self.as_scale.max(), + a2_scale=self.a2s_scale.max()) if self.tp_size > 1: final_hidden_states = tensor_model_parallel_all_reduce( @@ -466,6 +480,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] expert_params_mapping = [ + # These are the activation scales for the experts + # (param_name, weight_name, expert_id) + ("ws_scale" if weight_name in ["w1", "w3"] else "w2s_scale", + f"experts.{expert_id}.{weight_name}.weight_scale", expert_id) + for expert_id in range(self.config.num_local_experts) + for weight_name in ["w1", "w2", "w3"] + ] + [ # These are the weights for the experts # (param_name, weight_name, expert_id) ("ws" if weight_name in ["w1", "w3"] else "w2s", From 924e8ce4a61a7bc0400325f8cf027a116380a11e Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Fri, 26 Apr 2024 13:41:49 +0000 Subject: [PATCH 58/90] added missed file --- .../layers/fused_gemm_dq/fused_gemm_dq_fp8.py | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 vllm/model_executor/layers/fused_gemm_dq/fused_gemm_dq_fp8.py diff --git a/vllm/model_executor/layers/fused_gemm_dq/fused_gemm_dq_fp8.py b/vllm/model_executor/layers/fused_gemm_dq/fused_gemm_dq_fp8.py new file mode 100644 index 0000000000000..66e18e0c4b7b2 --- /dev/null +++ b/vllm/model_executor/layers/fused_gemm_dq/fused_gemm_dq_fp8.py @@ -0,0 +1,87 @@ +import cutlass +from cutlass import Tensor as FakeTensor +import cutlass.epilogue + +import torch +from typing import Optional, Tuple, Dict + + +def setup_dequant_epilogue( + plan: cutlass.op.Gemm, + dq: torch.Tensor, + scale_a: Optional[torch.Tensor], + scale_b: Optional[torch.Tensor], + bias: Optional[torch.Tensor], +) -> Tuple[cutlass.op.Gemm, Dict]: + assert bias is None + + if all([scale_a is None, scale_b is None]): + return plan, None + assert scale_b is not None + + def epilog_with_scale_b(accum, scale_b): + D = scale_b * accum + return D + + def epilog_with_both_scales(accum, scale_a, scale_b): + D = scale_a * (scale_b * accum) + return D + + visitor_args = {"scale_a": scale_a, "scale_b": scale_b, "D": dq} + epilogue_tensors = { + "accum": FakeTensor( + element=torch.float32, + shape=dq.shape, + layout_tag=cutlass.LayoutType.RowMajor, + ), + "D": dq, + "scale_b": scale_b, + } + epilog_fn = epilog_with_scale_b + + if scale_a is not None: + epilogue_tensors["scale_a"] = scale_a + visitor_args["scale_a"] = scale_a + epilog_fn = epilog_with_both_scales + + plan.epilogue_visitor = cutlass.epilogue.trace(epilog_fn, epilogue_tensors) + return plan, visitor_args + + +def fused_gemm_dq_fp8( + x_q: torch.Tensor, + w_q: torch.Tensor, + out_dtype: torch.dtype, + scale_a: Optional[torch.Tensor] = None, + scale_b: Optional[torch.Tensor] = None, + bias: Optional[torch.Tensor] = None, +) -> torch.Tensor: + dq = torch.empty((x_q.shape[0], w_q.shape[1]), dtype=out_dtype, device="cuda") + C = torch.zeros((x_q.shape[0], w_q.shape[1]), dtype=out_dtype, device="cuda") + + plan = cutlass.op.Gemm( + element_A=x_q.dtype, + element_B=w_q.dtype, + element_C=dq.dtype, + element_D=dq.dtype, + layout_A=cutlass.LayoutType.RowMajor, + layout_B=cutlass.LayoutType.ColumnMajor, + layout_C=cutlass.LayoutType.RowMajor, + element_accumulator=torch.float32, + kernel_cc=90, + ) + + plan, visitor_args = setup_dequant_epilogue(plan, dq, scale_a, scale_b, bias) + + plan.run( + x_q, + w_q, + C, + dq, + alpha=1, + beta=0, + visitor_args=visitor_args, + print_module=False, + ) + + return dq From 823a2e79ca7e767c9c4fb88b2f506154d6786d19 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Fri, 26 Apr 2024 08:55:39 -0600 Subject: [PATCH 59/90] Update run_fp8.py Co-authored-by: Tyler Michael Smith --- run_fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_fp8.py b/run_fp8.py index 0caf538de1fef..0ee0a69af91ad 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -7,7 +7,7 @@ choices = ["llama-static", "mistral-static", "mistral-dynamic", "mixtral-static"] parser = argparse.ArgumentParser() -parser.add_argument("--type", choices="mixtral-static") +parser.add_argument("--type", choices=choices) if __name__ == "__main__": args = parser.parse_args() From 81f42befabf210054ec1a717a8772b08410ac65e Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 27 Apr 2024 08:00:29 -0400 Subject: [PATCH 60/90] Dynamic FP8 works, but static does not (#213) merging michael's work --- requirements-cuda.txt | 1 - .../layers/fused_gemm_dq/__init__.py | 5 - .../layers/fused_gemm_dq/fused_gemm_dq_fp8.py | 87 ------- vllm/model_executor/layers/linear.py | 5 + .../layers/quantization/__init__.py | 3 +- .../layers/quantization/base_config.py | 6 - .../model_executor/layers/quantization/fp8.py | 195 ++++++++++++--- .../layers/quantization/fp8_serialized.py | 223 ------------------ .../model_loader/weight_utils.py | 12 +- vllm/model_executor/models/mixtral.py | 1 - 10 files changed, 174 insertions(+), 364 deletions(-) delete mode 100644 vllm/model_executor/layers/fused_gemm_dq/__init__.py delete mode 100644 vllm/model_executor/layers/fused_gemm_dq/fused_gemm_dq_fp8.py delete mode 100644 vllm/model_executor/layers/quantization/fp8_serialized.py diff --git a/requirements-cuda.txt b/requirements-cuda.txt index b873423911488..1bddae4c6f40f 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -7,4 +7,3 @@ nvidia-ml-py # for pynvml package vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library torch == 2.2.1 xformers == 0.0.25 # Requires PyTorch 2.2.1 -nvidia-cutlass diff --git a/vllm/model_executor/layers/fused_gemm_dq/__init__.py b/vllm/model_executor/layers/fused_gemm_dq/__init__.py deleted file mode 100644 index 2acb1c2aad6c7..0000000000000 --- a/vllm/model_executor/layers/fused_gemm_dq/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from vllm.model_executor.layers.fused_gemm_dq.fused_gemm_dq_fp8 import fused_gemm_dq_fp8 - -__all__ = [ - "fused_gemm_dq_fp8", -] \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_gemm_dq/fused_gemm_dq_fp8.py b/vllm/model_executor/layers/fused_gemm_dq/fused_gemm_dq_fp8.py deleted file mode 100644 index 66e18e0c4b7b2..0000000000000 --- a/vllm/model_executor/layers/fused_gemm_dq/fused_gemm_dq_fp8.py +++ /dev/null @@ -1,87 +0,0 @@ -import cutlass -from cutlass import Tensor as FakeTensor -import cutlass.epilogue - -import torch -from typing import Optional, Tuple, Dict - - -def setup_dequant_epilogue( - plan: cutlass.op.Gemm, - dq: torch.Tensor, - scale_a: Optional[torch.Tensor], - scale_b: Optional[torch.Tensor], - bias: Optional[torch.Tensor], -) -> Tuple[cutlass.op.Gemm, Dict]: - assert bias is None - - if all([scale_a is None, scale_b is None]): - return plan, None - assert scale_b is not None - - def epilog_with_scale_b(accum, scale_b): - D = scale_b * accum - return D - - def epilog_with_both_scales(accum, scale_a, scale_b): - D = scale_a * (scale_b * accum) - return D - - visitor_args = {"scale_a": scale_a, "scale_b": scale_b, "D": dq} - epilogue_tensors = { - "accum": FakeTensor( - element=torch.float32, - shape=dq.shape, - layout_tag=cutlass.LayoutType.RowMajor, - ), - "D": dq, - "scale_b": scale_b, - } - epilog_fn = epilog_with_scale_b - - if scale_a is not None: - epilogue_tensors["scale_a"] = scale_a - visitor_args["scale_a"] = scale_a - epilog_fn = epilog_with_both_scales - - plan.epilogue_visitor = cutlass.epilogue.trace(epilog_fn, epilogue_tensors) - return plan, visitor_args - - -def fused_gemm_dq_fp8( - x_q: torch.Tensor, - w_q: torch.Tensor, - out_dtype: torch.dtype, - scale_a: Optional[torch.Tensor] = None, - scale_b: Optional[torch.Tensor] = None, - bias: Optional[torch.Tensor] = None, -) -> torch.Tensor: - dq = torch.empty((x_q.shape[0], w_q.shape[1]), dtype=out_dtype, device="cuda") - C = torch.zeros((x_q.shape[0], w_q.shape[1]), dtype=out_dtype, device="cuda") - - plan = cutlass.op.Gemm( - element_A=x_q.dtype, - element_B=w_q.dtype, - element_C=dq.dtype, - element_D=dq.dtype, - layout_A=cutlass.LayoutType.RowMajor, - layout_B=cutlass.LayoutType.ColumnMajor, - layout_C=cutlass.LayoutType.RowMajor, - element_accumulator=torch.float32, - kernel_cc=90, - ) - - plan, visitor_args = setup_dequant_epilogue(plan, dq, scale_a, scale_b, bias) - - plan.run( - x_q, - w_q, - C, - dq, - alpha=1, - beta=0, - visitor_args=visitor_args, - print_module=False, - ) - - return dq diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 87b2bb7d9acce..cfe6eefe35455 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -232,6 +232,11 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): start_idx = tp_rank * shard_size loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size) + # TODO: canon + # This is for loading scales for fp8, which have no dims. + if len(loaded_weight.shape) == 0: + loaded_weight = loaded_weight.reshape(1) + assert param_data.shape == loaded_weight.shape param_data.copy_(loaded_weight) diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py index 060eaad31899b..a525add458499 100644 --- a/vllm/model_executor/layers/quantization/__init__.py +++ b/vllm/model_executor/layers/quantization/__init__.py @@ -4,8 +4,7 @@ from vllm.model_executor.layers.quantization.awq import AWQConfig from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) -# from vllm.model_executor.layers.quantization.fp8 import FP8Config -from vllm.model_executor.layers.quantization.fp8_serialized import FP8Config +from vllm.model_executor.layers.quantization.fp8 import FP8Config from vllm.model_executor.layers.quantization.gptq import GPTQConfig from vllm.model_executor.layers.quantization.marlin import MarlinConfig from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig diff --git a/vllm/model_executor/layers/quantization/base_config.py b/vllm/model_executor/layers/quantization/base_config.py index 08061f83b5a38..6115e7c3be956 100644 --- a/vllm/model_executor/layers/quantization/base_config.py +++ b/vllm/model_executor/layers/quantization/base_config.py @@ -29,12 +29,6 @@ def get_min_capability(self) -> int: """ raise NotImplementedError - # The following is not an abstract method and returns True by default. - @classmethod - def require_config_file(cls) -> bool: - """Whether this quantization config needs a configuration filen.""" - return True - @staticmethod @abstractmethod def get_config_filenames() -> List[str]: diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 271cddc1175a5..29637704d5e4d 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union import torch from torch.nn import Module @@ -9,14 +9,19 @@ from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) +ACTIVATION_SCHEMES = ["static", "dynamic"] + class FP8Config(QuantizationConfig): """Config class for FP8.""" def __init__( self, + is_serialized: bool = False, activation_scheme: str = "dynamic", ) -> None: + self.is_serialized = is_serialized + assert activation_scheme in ACTIVATION_SCHEMES self.activation_scheme = activation_scheme @classmethod @@ -32,20 +37,19 @@ def get_min_capability(cls) -> int: # TODO: PyTorch 2.3.0+ is required to run FP8 on # SM 89 (e.g. Ada) GPUs. Specifically, this PR has to # be included: https://github.com/pytorch/pytorch/pull/118881 - return 90 - - @classmethod - def require_config_file(cls) -> bool: - return False + return 89 @classmethod def get_config_filenames(cls) -> List[str]: - return ["quantize_config.json"] + return [] @classmethod def from_config(cls, config: Dict[str, Any]) -> "FP8Config": + quant_method = cls.get_from_keys(config, ["quant_method"]) + is_serialized = ("fp8" in quant_method) activation_scheme = cls.get_from_keys(config, ["activation_scheme"]) - return cls(activation_scheme) + return cls(is_serialized=is_serialized, + activation_scheme=activation_scheme) def get_linear_method(self) -> "Fp8LinearMethod": return Fp8LinearMethod(self) @@ -56,8 +60,12 @@ def get_scaled_act_names(self) -> List[str]: class Fp8LinearMethod(LinearMethodBase): """Linear method for FP8. - We now support common FP16/BF16 model checkpoints ONLY. The weight - scaling factor will be initialized after the model weights are loaded. + Supports loading FP8 checkpoints with static weight scale and + dynamic/scale activation scale. + + Also supports loading quantized FP16/BF16 model checkpoints with dynamic + activation scaling. The weight scaling factor will be initialized after + the model weights are loaded. Limitations: 1. Only support per-tensor quantization due to torch._scaled_mm support. @@ -81,54 +89,175 @@ def create_weights( params_dtype: torch.dtype, **extra_weight_attrs, ): + del input_size, output_size + + layer.logical_widths = output_partition_sizes output_size_per_partition = sum(output_partition_sizes) + + # WEIGHT + weight_dtype = torch.float8_e4m3fn if self.quant_config.is_serialized else params_dtype weight = Parameter(torch.empty(output_size_per_partition, input_size_per_partition, - dtype=params_dtype), + dtype=weight_dtype), requires_grad=False) layer.register_parameter("weight", weight) - set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) - set_weight_attrs(weight, extra_weight_attrs) + set_weight_attrs(weight, { + **extra_weight_attrs, + "input_dim": 1, + "output_dim": 0, + }) - w_scale = Parameter( - torch.empty(1, dtype=torch.float32), - requires_grad=False, - ) - layer.register_parameter("weight_scaling_factor", w_scale) + # WEIGHT SCALE + weight_scale = Parameter(torch.empty( + len(output_partition_sizes), + dtype=torch.float32, + ), + requires_grad=False) + layer.register_parameter("weight_scale", weight_scale) + set_weight_attrs( + weight_scale, { + **extra_weight_attrs, + "shard_indexer": self.scales_shard_indexer, + }) + + # ACTIVATION SCALE + if self.quant_config.activation_scheme == "static": + act_scale = Parameter(torch.empty(len(output_partition_sizes), + dtype=torch.float32), + requires_grad=False) + layer.register_parameter("act_scale", act_scale) + set_weight_attrs(act_scale, { + **extra_weight_attrs, + "shard_indexer": + self.scales_shard_indexer, + }) + + def shard_id_as_int(self, shard_id: Union[str, int]) -> int: + if isinstance(shard_id, int): + return shard_id + assert isinstance(shard_id, str) + qkv_idxs = {"q": 0, "k": 1, "v": 2} + assert shard_id in qkv_idxs + return qkv_idxs[shard_id] + + def scales_shard_indexer( + self, + param: torch.Tensor, + loaded_weight: torch.Tensor, + shard_id: Union[str, int], + logical_widths: List[int], + ) -> Tuple[torch.Tensor, torch.Tensor]: + del logical_widths + return param[self.shard_id_as_int(shard_id)], loaded_weight def process_weights_after_loading(self, layer: Module) -> None: # Although the linear_method is propagated to all layers, # only linear layers invoke "create_weights". So we check - # whether "weight_scaling_facor" is registered to determine + # whether "weight_scale" is registered to determine # whether the layer is a linear layer that requires quantization. - if not hasattr(layer, "weight_scaling_factor"): + if not hasattr(layer, "weight_scale"): return - qweight, weight_scale = per_tensor_quantize(layer.weight) + # If we loaded in an FP8 checkpoint, we can skip weight quantization + if self.quant_config.is_serialized: + # torch._scaled_mm requires column-major in the second + # input (weight), so we transpose the quantized weight. + # TODO + return + + qweight, weight_scale = per_tensor_quantize_dynamic(layer.weight) # torch._scaled_mm requires column-major in the second # input (weight), so we transpose the quantized weight. - layer.weight = Parameter(qweight.t(), requires_grad=False) - layer.weight_scaling_factor.data.copy_(weight_scale) + # TODO + # layer.weight = Parameter(qweight.t(), requires_grad=False) + layer.weight = Parameter(qweight, requires_grad=False) + weight_scales = torch.tensor( + [weight_scale for _ in layer.logical_widths], dtype=torch.float32) + layer.weight_scale.data.copy_(weight_scales) def apply_weights(self, layer: torch.nn.Module, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: - qinput, x_scale = per_tensor_quantize(x) - output, _ = torch._scaled_mm( - qinput, - layer.weight, - out_dtype=x.dtype, - scale_a=x_scale, - scale_b=layer.weight_scaling_factor, - bias=bias, - ) + + if self.quant_config.activation_scheme == "static": + # Empirically, these are all the same + x_scale = layer.act_scale.max() + qinput = per_tensor_quantize_static(x, x_scale) + else: + qinput, x_scale = per_tensor_quantize_dynamic(x) + + # # TODO: Inefficient loop over each shard since there is a per-tensor + # # scale for each shard. + # # To be replaced by cutlass gemm with epilogue fusion for performance. + # output = torch.zeros(x.shape[0], + # layer.weight.shape[0], + # dtype=x.dtype, + # device="cuda") + # start_offset = 0 + # for _, (logical_width, w_scale) in enumerate( + # zip(layer.logical_widths, layer.weight_scale)): + # end_offset = start_offset + logical_width + + # cuda_compute_capability = torch.cuda.get_device_capability() + # if cuda_compute_capability >= (9, 0): + # out, _ = torch._scaled_mm( + # qinput, + # layer.weight[start_offset:end_offset, :].t(), + # out_dtype=x.dtype, + # scale_a=x_scale, + # scale_b=w_scale, + # ) + # else: + # out = torch.nn.functional.linear( + # qinput.to(x.dtype) * x_scale.to(x.dtype), + # layer.weight[start_offset:end_offset, :].to(x.dtype) * w_scale.to(x.dtype), + # ) + + # output[:, start_offset:end_offset] = out + # start_offset = end_offset + + w_scale = layer.weight_scale.max() + + cuda_compute_capability = torch.cuda.get_device_capability() + if cuda_compute_capability >= (9, 0): + output, _ = torch._scaled_mm( + qinput, + layer.weight.t(), + out_dtype=x.dtype, + scale_a=x_scale, + scale_b=w_scale, + ) + else: + output = torch.nn.functional.linear( + qinput.to(x.dtype) * x_scale.to(x.dtype), + layer.weight.to(x.dtype) * w_scale.to(x.dtype), + ) + + if bias is not None: + output = output + bias + return output -def per_tensor_quantize(tensor: torch.Tensor) -> Tuple[torch.Tensor, float]: +def per_tensor_quantize_static(tensor: torch.Tensor, + inv_scale: float) -> torch.Tensor: """Quantize a tensor using per-tensor static scaling factor. + Args: + tensor: The input tensor. + inv_scale: The scale. + """ + # Scale and clamp the tensor to bring it to + # the representative range of float8 data type + # (as default cast is unsaturated) + finfo = torch.finfo(torch.float8_e4m3fn) + qweight = (tensor / inv_scale).clamp(min=finfo.min, max=finfo.max) + return qweight.to(torch.float8_e4m3fn) + +def per_tensor_quantize_dynamic( + tensor: torch.Tensor) -> Tuple[torch.Tensor, float]: + """Quantize a tensor using per-tensor dynamic scaling factor. Args: tensor: The input tensor. """ diff --git a/vllm/model_executor/layers/quantization/fp8_serialized.py b/vllm/model_executor/layers/quantization/fp8_serialized.py deleted file mode 100644 index a3511bdb02fa3..0000000000000 --- a/vllm/model_executor/layers/quantization/fp8_serialized.py +++ /dev/null @@ -1,223 +0,0 @@ -from typing import Any, Dict, List, Optional, Tuple, Union - -import torch -from torch.nn.parameter import Parameter - -from vllm.model_executor.layers.fused_gemm_dq import fused_gemm_dq_fp8 -from vllm.model_executor.layers.linear import (LinearMethodBase, - set_weight_attrs) -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) - - -class FP8Config(QuantizationConfig): - """Config class for FP8.""" - - def __init__( - self, - activation_scheme: str, - ) -> None: - assert activation_scheme == "static" or activation_scheme == "dynamic" - self.activation_scheme = activation_scheme - - @classmethod - def get_name(cls) -> str: - return "fp8" - - @classmethod - def get_supported_act_dtypes(cls) -> List[torch.dtype]: - return [torch.bfloat16, torch.half] - - @classmethod - def get_min_capability(cls) -> int: - return 90 - - @classmethod - def get_config_filenames(cls) -> List[str]: - return [] - - @classmethod - def from_config(cls, config: Dict[str, Any]) -> "FP8Config": - activation_scheme = cls.get_from_keys(config, ["activation_scheme"]) - return cls(activation_scheme=activation_scheme) - - def get_linear_method(self) -> "FP8LinearMethod": - return FP8LinearMethod(self) - - def get_scaled_act_names(self) -> List[str]: - return [] - - -class FP8LinearMethod(LinearMethodBase): - """Linear method for StaticFP8 - . - Args: - quant_config: The quantization config. - """ - - def __init__(self, quant_config: FP8Config): - self.quant_config = quant_config - - def create_weights( - self, - layer: torch.nn.Module, - input_size_per_partition: int, - output_partition_sizes: List[int], - input_size: int, - output_size: int, - params_dtype: torch.dtype, - **extra_weight_attrs, - ): - del input_size, output_size, params_dtype - num_logical_weights = len(output_partition_sizes) - layer.logical_widths = output_partition_sizes - - # WEIGHT - weight = Parameter(torch.empty(sum(output_partition_sizes), - input_size_per_partition, - dtype=torch.float8_e4m3fn), - requires_grad=False) - layer.register_parameter("weight", weight) - set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) - set_weight_attrs(weight, extra_weight_attrs) - - # WEIGHT SCALE - if num_logical_weights == 1: - weight_scale = Parameter( - torch.empty(len(output_partition_sizes), dtype=torch.float32), - requires_grad=False) - - layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs(weight_scale, extra_weight_attrs) - set_weight_attrs(weight_scale, { - "shard_indexer": self.scales_shard_indexer, - }) - else: - weight_scale = Parameter( - torch.empty(sum(output_partition_sizes), dtype=torch.float32), - requires_grad=False) - layer.register_parameter("weight_scale", weight_scale) - # set_weight_attrs(weight_scale, extra_weight_attrs) - set_weight_attrs(weight_scale, { - **extra_weight_attrs, - "shard_indexer": self.scales_shard_indexer_NKK, - "logical_widths": output_partition_sizes - }) - - # ACT Scale - if self.quant_config.activation_scheme == "static": - act_scale = Parameter(torch.empty(len(output_partition_sizes), - dtype=torch.float32), - requires_grad=False) - layer.register_parameter("act_scale", act_scale) - set_weight_attrs(act_scale, extra_weight_attrs) - set_weight_attrs(act_scale, { - "shard_indexer": self.scales_shard_indexer, - }) - - - def shard_id_as_int(self, shard_id: Union[str, int]) -> int: - if isinstance(shard_id, int): - return shard_id - assert isinstance(shard_id, str) - qkv_idxs = {"q": 0, "k": 1, "v": 2} - assert shard_id in qkv_idxs - return qkv_idxs[shard_id] - - def scales_shard_indexer_NKK( - self, - param: torch.Tensor, - loaded_weight: torch.Tensor, - shard_id: Union[str, int], - logical_widths, - ) -> Tuple[torch.Tensor, torch.Tensor]: - shard_id = self.shard_id_as_int(shard_id) - offset = sum(logical_widths[:shard_id]) - size = logical_widths[shard_id] - # update loaded weight with copies for broadcast. - loaded_weight = loaded_weight.repeat(size) - return param[offset:offset + size], loaded_weight - - def scales_shard_indexer( - self, - param: torch.Tensor, - loaded_weight: torch.Tensor, - shard_id: Union[str, int], - logical_widths, - ) -> Tuple[torch.Tensor, torch.Tensor]: - del logical_widths - return param[self.shard_id_as_int(shard_id)], loaded_weight - - def apply_weights(self, - layer: torch.nn.Module, - x: torch.Tensor, - bias: Optional[torch.Tensor] = None) -> torch.Tensor: - - m = x.shape[0] - - if self.quant_config.activation_scheme == "dynamic": - qinput, x_scale = per_tensor_quantize_dyanmic(x) - elif self.quant_config.activation_scheme == "static": - # empirically, these are all the same - x_scale = layer.act_scale.max() - qinput = per_tensor_quantize_static(x, x_scale) - - # HACK: CUTLASS hits an illegal instruction for fp8 when m == 1, - # so pad m to 2 - if m == 1: - tmp = torch.zeros(2, - qinput.shape[1], - dtype=qinput.dtype, - device="cuda") - tmp[0, :] = qinput - qinput = tmp - - out_cutlass_qkv = fused_gemm_dq_fp8( - qinput, - layer.weight.t(), - out_dtype=x.dtype, - scale_a=x_scale, - scale_b=layer.weight_scale, - bias=bias, - ) - - return out_cutlass_qkv[:m, :] - - -def per_tensor_quantize_static(tensor: torch.Tensor, - inv_scale: float) -> torch.Tensor: - """Quantize a tensor using per-tensor static scaling factor. - Args: - tensor: The input tensor. - inv_scale: The scale. - """ - # Scale and clamp the tensor to bring it to - # the representative range of float8 data type - # (as default cast is unsaturated) - finfo = torch.finfo(torch.float8_e4m3fn) - qweight = (tensor / inv_scale).clamp(min=finfo.min, max=finfo.max) - return qweight.to(torch.float8_e4m3fn) - - -def per_tensor_quantize_dyanmic( - tensor: torch.Tensor) -> Tuple[torch.Tensor, float]: - """Quantize a tensor using per-tensor dynamic scaling factor. - Args: - tensor: The input tensor. - """ - finfo = torch.finfo(torch.float8_e4m3fn) - # Calculate the scale as dtype max divided by absmax. - # Since .abs() creates a new tensor, we use aminmax to get - # the min and max first and then calculate the absmax. - min_val, max_val = tensor.aminmax() - amax = min_val.abs().max(max_val.abs()) - scale = finfo.max / amax.clamp(min=1e-12) - # scale and clamp the tensor to bring it to - # the representative range of float8 data type - # (as default cast is unsaturated) - qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max) - # Return both float8 data and the inverse scale (as float), - # as both required as inputs to torch._scaled_mm - qweight = qweight.to(torch.float8_e4m3fn) - scale = scale.float().reciprocal() - return qweight, scale diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 54fd4f22f0eef..c061c9b8ab68c 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -135,19 +135,19 @@ def get_quant_config(model_config: ModelConfig, else: hf_folder = model_name_or_path - config_files = glob.glob(os.path.join(hf_folder, "*.json")) possible_config_filenames = quant_cls.get_config_filenames() + # If the quantization config is not found, use the default config. + if not possible_config_filenames: + return quant_cls() + + config_files = glob.glob(os.path.join(hf_folder, "*.json")) + quant_config_files = [ f for f in config_files if any( f.endswith(x) for x in possible_config_filenames) ] - # If the quantization config is optional and not provided, use the - # default config. - if not quant_cls.require_config_file() and not quant_config_files: - return quant_cls() - if len(quant_config_files) == 0: raise ValueError( f"Cannot find the config file for {model_config.quantization}") diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 2f7aca2c3b0e9..3c9b7da426635 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -39,7 +39,6 @@ ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.fp8_serialized import FP8LinearMethod from vllm.model_executor.layers.quantization.fp8 import (Fp8LinearMethod, per_tensor_quantize) from vllm.model_executor.layers.rotary_embedding import get_rope From 1a4fd8ab466e0cff07fad700d3686522fcf37c65 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 12:10:06 +0000 Subject: [PATCH 61/90] static correctness --- .../model_executor/layers/quantization/fp8.py | 87 +++++++------------ 1 file changed, 30 insertions(+), 57 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 29637704d5e4d..126a530dbe4e2 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -96,29 +96,26 @@ def create_weights( # WEIGHT weight_dtype = torch.float8_e4m3fn if self.quant_config.is_serialized else params_dtype - weight = Parameter(torch.empty(output_size_per_partition, - input_size_per_partition, - dtype=weight_dtype), - requires_grad=False) + weight = Parameter( + torch.empty(output_size_per_partition, + input_size_per_partition, + dtype=weight_dtype), + requires_grad=False) layer.register_parameter("weight", weight) set_weight_attrs(weight, { **extra_weight_attrs, - "input_dim": 1, - "output_dim": 0, + "input_dim": 1, "output_dim": 0, }) # WEIGHT SCALE - weight_scale = Parameter(torch.empty( - len(output_partition_sizes), - dtype=torch.float32, - ), - requires_grad=False) + weight_scale = Parameter( + torch.empty(len(output_partition_sizes), dtype=torch.float32), + requires_grad=False) layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs( - weight_scale, { - **extra_weight_attrs, - "shard_indexer": self.scales_shard_indexer, - }) + set_weight_attrs(weight_scale, { + **extra_weight_attrs, + "shard_indexer": self.scales_shard_indexer, + }) # ACTIVATION SCALE if self.quant_config.activation_scheme == "static": @@ -187,52 +184,28 @@ def apply_weights(self, else: qinput, x_scale = per_tensor_quantize_dynamic(x) - # # TODO: Inefficient loop over each shard since there is a per-tensor - # # scale for each shard. - # # To be replaced by cutlass gemm with epilogue fusion for performance. - # output = torch.zeros(x.shape[0], - # layer.weight.shape[0], - # dtype=x.dtype, - # device="cuda") - # start_offset = 0 - # for _, (logical_width, w_scale) in enumerate( - # zip(layer.logical_widths, layer.weight_scale)): - # end_offset = start_offset + logical_width - - # cuda_compute_capability = torch.cuda.get_device_capability() - # if cuda_compute_capability >= (9, 0): - # out, _ = torch._scaled_mm( - # qinput, - # layer.weight[start_offset:end_offset, :].t(), - # out_dtype=x.dtype, - # scale_a=x_scale, - # scale_b=w_scale, - # ) - # else: - # out = torch.nn.functional.linear( - # qinput.to(x.dtype) * x_scale.to(x.dtype), - # layer.weight[start_offset:end_offset, :].to(x.dtype) * w_scale.to(x.dtype), - # ) - - # output[:, start_offset:end_offset] = out - # start_offset = end_offset - - w_scale = layer.weight_scale.max() - - cuda_compute_capability = torch.cuda.get_device_capability() - if cuda_compute_capability >= (9, 0): - output, _ = torch._scaled_mm( + # TODO: Inefficient loop over each shard since there is a per-tensor + # scale for each shard. + # To be replaced by cutlass gemm with epilogue fusion for performance. + output = torch.zeros(x.shape[0], + layer.weight.shape[0], + dtype=x.dtype, + device="cuda") + start_offset = 0 + for _, (logical_width, w_scale) in enumerate( + zip(layer.logical_widths, layer.weight_scale)): + end_offset = start_offset + logical_width + + out, _ = torch._scaled_mm( qinput, - layer.weight.t(), + layer.weight[start_offset:end_offset, :].t(), out_dtype=x.dtype, scale_a=x_scale, scale_b=w_scale, ) - else: - output = torch.nn.functional.linear( - qinput.to(x.dtype) * x_scale.to(x.dtype), - layer.weight.to(x.dtype) * w_scale.to(x.dtype), - ) + + output[:, start_offset:end_offset] = out + start_offset = end_offset if bias is not None: output = output + bias From e48c981cfaa15280be0f7257f210232233faf8f3 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 13:29:26 +0000 Subject: [PATCH 62/90] static fp8 loading --- run_fp8.py | 9 +- .../model_executor/layers/quantization/fp8.py | 182 ++++++++---------- 2 files changed, 85 insertions(+), 106 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 0ee0a69af91ad..fdea9e66d8b45 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -4,7 +4,7 @@ from vllm import LLM -choices = ["llama-static", "mistral-static", "mistral-dynamic", "mixtral-static"] +choices = ["llama-static", "mistral-static", "mistral-dynamic", "mixtral-static", "tinyllama-fp16"] parser = argparse.ArgumentParser() parser.add_argument("--type", choices=choices) @@ -20,18 +20,21 @@ model_name = "nm-testing/mistral-fp8-dynamic" elif args.type == 'mixtral-static': model_name = "nm-testing/Mixtral-8x7B-Instruct-v0.1-FP8" + elif args.type == 'tinyllama-fp16': + model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" else: raise ValueError(f"--type should be in {choices}") model = LLM(model_name, enforce_eager=True, - max_model_len=1024) + max_model_len=1024, + quantization="fp8") tokenizer = AutoTokenizer.from_pretrained(model_name) prompt = tokenizer.apply_chat_template([{ "role": "user", - "content": "What is your name" + "content": "What is open source software?" }], tokenize=False, add_generation_prompt=True) print(f"----- Prompt: {prompt}") diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 126a530dbe4e2..68a29dc334142 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -4,6 +4,7 @@ from torch.nn import Module from torch.nn.parameter import Parameter +from vllm import _custom_ops as ops from vllm.model_executor.layers.linear import (LinearMethodBase, set_weight_attrs) from vllm.model_executor.layers.quantization.base_config import ( @@ -37,7 +38,7 @@ def get_min_capability(cls) -> int: # TODO: PyTorch 2.3.0+ is required to run FP8 on # SM 89 (e.g. Ada) GPUs. Specifically, this PR has to # be included: https://github.com/pytorch/pytorch/pull/118881 - return 89 + return 90 @classmethod def get_config_filenames(cls) -> List[str]: @@ -90,10 +91,11 @@ def create_weights( **extra_weight_attrs, ): del input_size, output_size - - layer.logical_widths = output_partition_sizes output_size_per_partition = sum(output_partition_sizes) + layer.process_after_load = True + layer.logical_widths = output_partition_sizes + # WEIGHT weight_dtype = torch.float8_e4m3fn if self.quant_config.is_serialized else params_dtype weight = Parameter( @@ -107,28 +109,31 @@ def create_weights( "input_dim": 1, "output_dim": 0, }) - # WEIGHT SCALE - weight_scale = Parameter( - torch.empty(len(output_partition_sizes), dtype=torch.float32), - requires_grad=False) - layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs(weight_scale, { - **extra_weight_attrs, - "shard_indexer": self.scales_shard_indexer, - }) - - # ACTIVATION SCALE - if self.quant_config.activation_scheme == "static": - act_scale = Parameter(torch.empty(len(output_partition_sizes), - dtype=torch.float32), - requires_grad=False) - layer.register_parameter("act_scale", act_scale) - set_weight_attrs(act_scale, { + # SCALES + # We only need to load scales if the model is serialized FP8. + # Otherwise, scale creation is delayed until `process_weights_after_loading`. + if self.quant_config.is_serialized: + # WEIGHT SCALE + weight_scale = Parameter( + torch.empty(len(output_partition_sizes), dtype=torch.float32), + requires_grad=False) + layer.register_parameter("weight_scale", weight_scale) + set_weight_attrs(weight_scale, { **extra_weight_attrs, - "shard_indexer": - self.scales_shard_indexer, + "shard_indexer": self.scales_shard_indexer, }) + # ACTIVATION SCALE + if self.quant_config.activation_scheme == "static": + act_scale = Parameter( + torch.empty(len(output_partition_sizes), dtype=torch.float32), + requires_grad=False) + layer.register_parameter("act_scale", act_scale) + set_weight_attrs(act_scale, { + **extra_weight_attrs, + "shard_indexer": self.scales_shard_indexer, + }) + def shard_id_as_int(self, shard_id: Union[str, int]) -> int: if isinstance(shard_id, int): return shard_id @@ -152,101 +157,72 @@ def process_weights_after_loading(self, layer: Module) -> None: # only linear layers invoke "create_weights". So we check # whether "weight_scale" is registered to determine # whether the layer is a linear layer that requires quantization. - if not hasattr(layer, "weight_scale"): + if not hasattr(layer, "process_after_load") or not layer.process_after_load: return - # If we loaded in an FP8 checkpoint, we can skip weight quantization - if self.quant_config.is_serialized: - # torch._scaled_mm requires column-major in the second - # input (weight), so we transpose the quantized weight. - # TODO + # If the model was not serialized, quantize the weights. + if not self.quant_config.is_serialized: + qweight, weight_scale = ops.scaled_fp8_quant(layer.weight, scale=None) + layer.weight = Parameter(qweight, requires_grad=False) + layer.weight_scale = Parameter(weight_scale, requires_grad=False) + layer.logical_widths = None + layer.act_scale = None return - qweight, weight_scale = per_tensor_quantize_dynamic(layer.weight) - # torch._scaled_mm requires column-major in the second - # input (weight), so we transpose the quantized weight. - # TODO - # layer.weight = Parameter(qweight.t(), requires_grad=False) - layer.weight = Parameter(qweight, requires_grad=False) - weight_scales = torch.tensor( - [weight_scale for _ in layer.logical_widths], dtype=torch.float32) - layer.weight_scale.data.copy_(weight_scales) + # If the model is serialized, cleanup the weight_scales / act_scales. + else: + if len(layer.logical_widths) == 1: + layer.weight_scale = Parameter(layer.weight_scale.max(), requires_grad=False) + layer.logical_widths = None + # ACT_SCALE + if self.quant_config.activation_scheme == "dynamic": + layer.act_scale = None + elif self.quant_config.activation_scheme == "static": + # Act_scale for each logical input is the same, so take max(). + layer.act_scale = Parameter(layer.act_scale.max(), requires_grad=False) + else: + raise ValueError(f"Unknown activation_scheme {self.quant_config.activation_scheme}") def apply_weights(self, layer: torch.nn.Module, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: - - if self.quant_config.activation_scheme == "static": - # Empirically, these are all the same - x_scale = layer.act_scale.max() - qinput = per_tensor_quantize_static(x, x_scale) - else: - qinput, x_scale = per_tensor_quantize_dynamic(x) - - # TODO: Inefficient loop over each shard since there is a per-tensor - # scale for each shard. - # To be replaced by cutlass gemm with epilogue fusion for performance. - output = torch.zeros(x.shape[0], - layer.weight.shape[0], - dtype=x.dtype, - device="cuda") - start_offset = 0 - for _, (logical_width, w_scale) in enumerate( - zip(layer.logical_widths, layer.weight_scale)): - end_offset = start_offset + logical_width - - out, _ = torch._scaled_mm( + # ops.scaled_fp8_quant supports both dynamic and static quant. + # If dynamic, layer.act_scale is None and x_scale computed from x. + # If static, layer.act_scale is scalar and x_scale set to act_scale. + qinput, x_scale = ops.scaled_fp8_quant(x, layer.act_scale) + + # Case 1: we have one single scale for N logical weights. + if layer.logical_widths is None: + output, _ = torch._scaled_mm( qinput, - layer.weight[start_offset:end_offset, :].t(), + layer.weight.t(), out_dtype=x.dtype, scale_a=x_scale, - scale_b=w_scale, + scale_b=layer.weight_scale, ) - - output[:, start_offset:end_offset] = out - start_offset = end_offset + + # Case 2: We have N weigth_scales for N logical weights. + # Current: inefficient for loop to apply each logical GEMM_DQ. + # TODO: replace will cutlass gemm_dq with epilogue fusion. + else: + output = torch.empty(x.shape[0], layer.weight.shape[0], + dtype=x.dtype, device="cuda") + start = 0 + # Loop over the N logical shards. + for logical_width, w_scale in zip(layer.logical_widths, layer.weight_scale): + end = start + logical_width + out, _ = torch._scaled_mm( + qinput, + layer.weight[start:end, :].t(), + out_dtype=x.dtype, + scale_a=x_scale, + scale_b=w_scale, + ) + output[:, start:end] = out + start = end if bias is not None: - output = output + bias + output.add_(bias) return output - - -def per_tensor_quantize_static(tensor: torch.Tensor, - inv_scale: float) -> torch.Tensor: - """Quantize a tensor using per-tensor static scaling factor. - Args: - tensor: The input tensor. - inv_scale: The scale. - """ - # Scale and clamp the tensor to bring it to - # the representative range of float8 data type - # (as default cast is unsaturated) - finfo = torch.finfo(torch.float8_e4m3fn) - qweight = (tensor / inv_scale).clamp(min=finfo.min, max=finfo.max) - return qweight.to(torch.float8_e4m3fn) - - -def per_tensor_quantize_dynamic( - tensor: torch.Tensor) -> Tuple[torch.Tensor, float]: - """Quantize a tensor using per-tensor dynamic scaling factor. - Args: - tensor: The input tensor. - """ - finfo = torch.finfo(torch.float8_e4m3fn) - # Calculate the scale as dtype max divided by absmax. - # Since .abs() creates a new tensor, we use aminmax to get - # the min and max first and then calculate the absmax. - min_val, max_val = tensor.aminmax() - amax = min_val.abs().max(max_val.abs()) - scale = finfo.max / amax.clamp(min=1e-12) - # scale and clamp the tensor to bring it to - # the representative range of float8 data type - # (as default cast is unsaturated) - qweight = (tensor * scale).clamp(min=finfo.min, max=finfo.max) - # Return both float8 data and the inverse scale (as float), - # as both required as inputs to torch._scaled_mm - qweight = qweight.to(torch.float8_e4m3fn) - scale = scale.float().reciprocal() - return qweight, scale From 02f683e6f4139107d30b088aa6a40da773a14751 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 13:56:29 +0000 Subject: [PATCH 63/90] working for dense models --- run_fp8.py | 10 ++++- .../model_executor/layers/quantization/fp8.py | 38 +++++++++++++++---- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index fdea9e66d8b45..507f9972a740f 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -4,7 +4,12 @@ from vllm import LLM -choices = ["llama-static", "mistral-static", "mistral-dynamic", "mixtral-static", "tinyllama-fp16"] +choices = ["llama-static", + "mistral-static", + "mistral-dynamic", + "mixtral-static", + "tinyllama-fp16", + "qwen-fp16"] parser = argparse.ArgumentParser() parser.add_argument("--type", choices=choices) @@ -22,6 +27,8 @@ model_name = "nm-testing/Mixtral-8x7B-Instruct-v0.1-FP8" elif args.type == 'tinyllama-fp16': model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" + elif args.type == 'qwen-fp16': + model_name = "Qwen/CodeQwen1.5-7B-Chat" else: raise ValueError(f"--type should be in {choices}") @@ -39,6 +46,5 @@ print(f"----- Prompt: {prompt}") outputs = model.generate(prompt) - print(outputs) generation = outputs[0].outputs[0].text print(f"----- Generation: {generation}") diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 68a29dc334142..f291bc2ff9cad 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -160,25 +160,40 @@ def process_weights_after_loading(self, layer: Module) -> None: if not hasattr(layer, "process_after_load") or not layer.process_after_load: return - # If the model was not serialized, quantize the weights. + # If the checkpoint is fp16/bf16 (not serialized fp8), quantize the weights. if not self.quant_config.is_serialized: qweight, weight_scale = ops.scaled_fp8_quant(layer.weight, scale=None) - layer.weight = Parameter(qweight, requires_grad=False) + layer.weight = Parameter(qweight.t(), requires_grad=False) layer.weight_scale = Parameter(weight_scale, requires_grad=False) layer.logical_widths = None layer.act_scale = None return - # If the model is serialized, cleanup the weight_scales / act_scales. - else: + # If the checkpoint is serialized fp8, cleanup state_dict --> apply_weights. + # TODO: this will be cleaned up once we have the cutlass kernels. + else: + # WEIGHT + # Tranpose weight for passing to torch._scaled_mm + weight = layer.weight + layer.weight = Parameter(weight.t(), requires_grad=False) + + # WEIGHT_SCALE + # If we only have one logical shard, avoid the for loop in apply weights. + # TODO: once we have the cutlass_gemm, this will be removed. if len(layer.logical_widths) == 1: layer.weight_scale = Parameter(layer.weight_scale.max(), requires_grad=False) layer.logical_widths = None + # ACT_SCALE + # Dyanmic: set to None (required input to ops.scaled_fp8_quant). + # Static: set to max of the act_scales (since they are equal to eachoter). if self.quant_config.activation_scheme == "dynamic": layer.act_scale = None elif self.quant_config.activation_scheme == "static": - # Act_scale for each logical input is the same, so take max(). + if not all_close_1d(layer.act_scale): + raise ValueError( + "All the act_scales for the logical weights of a layer " + f"must be equal. But got {layer.act_scale}") layer.act_scale = Parameter(layer.act_scale.max(), requires_grad=False) else: raise ValueError(f"Unknown activation_scheme {self.quant_config.activation_scheme}") @@ -196,7 +211,7 @@ def apply_weights(self, if layer.logical_widths is None: output, _ = torch._scaled_mm( qinput, - layer.weight.t(), + layer.weight, out_dtype=x.dtype, scale_a=x_scale, scale_b=layer.weight_scale, @@ -206,7 +221,7 @@ def apply_weights(self, # Current: inefficient for loop to apply each logical GEMM_DQ. # TODO: replace will cutlass gemm_dq with epilogue fusion. else: - output = torch.empty(x.shape[0], layer.weight.shape[0], + output = torch.empty(x.shape[0], layer.weight.shape[1], dtype=x.dtype, device="cuda") start = 0 # Loop over the N logical shards. @@ -214,7 +229,7 @@ def apply_weights(self, end = start + logical_width out, _ = torch._scaled_mm( qinput, - layer.weight[start:end, :].t(), + layer.weight[:, start:end], out_dtype=x.dtype, scale_a=x_scale, scale_b=w_scale, @@ -226,3 +241,10 @@ def apply_weights(self, output.add_(bias) return output + +def all_close_1d(x: torch.Tensor): + assert len(x.shape) == 1 + for i in range(x.shape[0]): + if not torch.allclose(x[0], x[i]): + return False + return True From 81b73ef23d6335476f26b493cbdd35f05ef8f005 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 27 Apr 2024 09:59:58 -0400 Subject: [PATCH 64/90] Update weight_utils.py --- vllm/model_executor/model_loader/weight_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index c061c9b8ab68c..9995f2afe3cf7 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -147,7 +147,6 @@ def get_quant_config(model_config: ModelConfig, f for f in config_files if any( f.endswith(x) for x in possible_config_filenames) ] - if len(quant_config_files) == 0: raise ValueError( f"Cannot find the config file for {model_config.quantization}") From 58dbe0fae451024659a8f50acbf0b12b69e3f987 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 14:01:51 +0000 Subject: [PATCH 65/90] moving mixtral updates to separate pr --- vllm/model_executor/models/mixtral.py | 105 +++++++++++--------------- 1 file changed, 42 insertions(+), 63 deletions(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 3c9b7da426635..c5dd1a63e2f7a 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -27,6 +27,7 @@ from torch import nn from transformers import MixtralConfig +from vllm import _custom_ops as ops from vllm.attention import Attention, AttentionMetadata from vllm.config import LoRAConfig from vllm.distributed import (get_tensor_model_parallel_rank, @@ -34,13 +35,13 @@ tensor_model_parallel_all_reduce) from vllm.model_executor.layers.fused_moe import fused_moe from vllm.model_executor.layers.layernorm import RMSNorm -from vllm.model_executor.layers.linear import (LinearMethodBase, - QKVParallelLinear, +from vllm.model_executor.layers.linear import (QKVParallelLinear, ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.fp8 import (Fp8LinearMethod, - per_tensor_quantize) +from vllm.model_executor.layers.quantization.base_config import ( + QuantizationConfig) +from vllm.model_executor.layers.quantization.fp8 import Fp8Config from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( @@ -69,7 +70,7 @@ def __init__( intermediate_size: int, params_dtype: Optional[torch.dtype] = None, tp_size: Optional[int] = None, - linear_method: Optional[LinearMethodBase] = None, + quant_config: Optional[QuantizationConfig] = None, ): super().__init__() self.tp_size = tp_size or get_tensor_model_parallel_world_size() @@ -79,8 +80,7 @@ def __init__( self.intermediate_size = intermediate_size // self.tp_size # FIXME(pcmoritz): Make this more general to support different # quantization schemes - self.use_fp8 = (isinstance(linear_method, Fp8LinearMethod) or - isinstance(linear_method, FP8LinearMethod)) + self.use_fp8 = isinstance(quant_config, Fp8Config) if params_dtype is None: params_dtype = torch.get_default_dtype() @@ -90,23 +90,20 @@ def __init__( self.num_total_experts, bias=False, params_dtype=self.params_dtype, - linear_method=None) - - if self.use_fp8: - params_dtype = torch.float8_e4m3fn + quant_config=None) self.ws = nn.Parameter( torch.empty(self.num_total_experts, 2 * self.intermediate_size, self.hidden_size, device="cuda", - dtype=params_dtype)) + dtype=self.params_dtype)) self.w2s = nn.Parameter( torch.empty(self.num_total_experts, self.hidden_size, self.intermediate_size, device="cuda", - dtype=params_dtype)) + dtype=self.params_dtype)) set_weight_attrs(self.ws, { "weight_loader": self.weight_loader, @@ -124,23 +121,15 @@ def __init__( torch.ones( self.num_total_experts, device="cuda", dtype=torch.float32), requires_grad=False) if self.use_fp8 else None - - set_weight_attrs(self.ws_scale, { - "weight_loader": self.weight_loader, - }) - set_weight_attrs(self.w2s_scale, { - "weight_loader": self.weight_loader, - }) # Scaling factors for FP8 activations need_act_scales = (self.use_fp8 - and linear_method.quant_config.activation_scheme - == "static") + and quant_config.activation_scheme == "static") self.as_scale = nn.Parameter( - torch.zeros(self.num_total_experts, device="cuda", dtype=torch.float32), + torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None self.a2s_scale = nn.Parameter( - torch.zeros(self.num_total_experts, device="cuda", dtype=torch.float32), + torch.zeros(1, device="cuda", dtype=torch.float32), requires_grad=False) if need_act_scales else None if need_act_scales: @@ -163,29 +152,27 @@ def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor, param_data[expert_id, shard_size:2 * shard_size, :] = loaded_weight[shard, :] if weight_name.endswith("w2.weight"): - param_data[expert_id] = loaded_weight[:, shard] - if "act_scale" in weight_name or "weight_scale" in weight_name: - param_data[expert_id] = loaded_weight - - # def process_weights_after_loading(self): - # if self.use_fp8: - # ws = torch.empty_like(self.ws.data, dtype=torch.float8_e4m3fn) - # w2s = torch.empty_like(self.w2s.data, dtype=torch.float8_e4m3fn) - # for expert in range(self.num_total_experts): - # ws[expert, :, :], self.ws_scale[expert] = per_tensor_quantize( - # self.ws.data[expert, :, :]) - # w2s[expert, :, :], self.w2s_scale[ - # expert] = per_tensor_quantize(self.w2s.data[expert, :, :]) - # self.ws = nn.Parameter(ws, requires_grad=False) - # self.w2s = nn.Parameter(w2s, requires_grad=False) + param_data[expert_id, :, :] = loaded_weight[:, shard] + if "act_scale" in weight_name: + param_data[:] = param_data[:].max(loaded_weight) + + def process_weights_after_loading(self): + if self.use_fp8: + ws = torch.empty_like(self.ws.data, dtype=torch.float8_e4m3fn) + w2s = torch.empty_like(self.w2s.data, dtype=torch.float8_e4m3fn) + for expert in range(self.num_total_experts): + ws[expert, :, :], self.ws_scale[expert] = ops.scaled_fp8_quant( + self.ws.data[expert, :, :]) + w2s[expert, :, :], self.w2s_scale[ + expert] = ops.scaled_fp8_quant(self.w2s.data[expert, :, :]) + self.ws = nn.Parameter(ws, requires_grad=False) + self.w2s = nn.Parameter(w2s, requires_grad=False) def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: num_tokens, hidden_size = hidden_states.shape hidden_states = hidden_states.view(-1, self.hidden_size) # router_logits: (num_tokens, n_experts) router_logits, _ = self.gate(hidden_states) - - # TODO: fused MoE kernel might want to take different scales for each expert? final_hidden_states = fused_moe(hidden_states, self.ws, self.w2s, @@ -196,8 +183,8 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: use_fp8=self.use_fp8, w1_scale=self.ws_scale, w2_scale=self.w2s_scale, - a1_scale=self.as_scale.max(), - a2_scale=self.a2s_scale.max()) + a1_scale=self.as_scale, + a2_scale=self.a2s_scale) if self.tp_size > 1: final_hidden_states = tensor_model_parallel_all_reduce( @@ -214,7 +201,7 @@ def __init__(self, num_kv_heads: int, max_position: int = 4096 * 32, rope_theta: float = 10000, - linear_method: Optional[LinearMethodBase] = None, + quant_config: Optional[QuantizationConfig] = None, sliding_window: Optional[int] = None) -> None: super().__init__() self.hidden_size = hidden_size @@ -239,12 +226,12 @@ def __init__(self, self.rope_theta = rope_theta self.sliding_window = sliding_window - if isinstance(linear_method, Fp8LinearMethod): + if isinstance(quant_config, Fp8Config): print_warning_once( "For Mixtral FP8 quantization, we currently do not quantize " "the attention layers until their FP8 performance is improved." ) - linear_method = None + quant_config = None self.qkv_proj = QKVParallelLinear( hidden_size, @@ -252,13 +239,13 @@ def __init__(self, self.total_num_heads, self.total_num_kv_heads, bias=False, - linear_method=linear_method, + quant_config=quant_config, ) self.o_proj = RowParallelLinear( self.total_num_heads * self.head_dim, hidden_size, bias=False, - linear_method=linear_method, + quant_config=quant_config, ) self.rotary_emb = get_rope( self.head_dim, @@ -295,7 +282,7 @@ class MixtralDecoderLayer(nn.Module): def __init__( self, config: MixtralConfig, - linear_method: Optional[LinearMethodBase] = None, + quant_config: Optional[QuantizationConfig] = None, ) -> None: super().__init__() self.hidden_size = config.hidden_size @@ -308,13 +295,13 @@ def __init__( num_kv_heads=config.num_key_value_heads, rope_theta=rope_theta, sliding_window=config.sliding_window, - linear_method=linear_method) + quant_config=quant_config) self.block_sparse_moe = MixtralMoE( num_experts=config.num_local_experts, top_k=config.num_experts_per_tok, hidden_size=config.hidden_size, intermediate_size=config.intermediate_size, - linear_method=linear_method) + quant_config=quant_config) self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.post_attention_layernorm = RMSNorm(config.hidden_size, @@ -354,7 +341,7 @@ class MixtralModel(nn.Module): def __init__( self, config: MixtralConfig, - linear_method: Optional[LinearMethodBase] = None, + quant_config: Optional[QuantizationConfig] = None, lora_config: Optional[LoRAConfig] = None, ) -> None: super().__init__() @@ -370,7 +357,7 @@ def __init__( org_num_embeddings=config.vocab_size, ) self.layers = nn.ModuleList([ - MixtralDecoderLayer(config, linear_method=linear_method) + MixtralDecoderLayer(config, quant_config=quant_config) for _ in range(config.num_hidden_layers) ]) self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) @@ -420,14 +407,13 @@ class MixtralForCausalLM(nn.Module): def __init__( self, config: MixtralConfig, - linear_method: Optional[LinearMethodBase] = None, + quant_config: Optional[QuantizationConfig] = None, lora_config: Optional[LoRAConfig] = None, ) -> None: super().__init__() self.config = config - self.linear_method = linear_method self.model = MixtralModel(config, - linear_method, + quant_config, lora_config=lora_config) self.unpadded_vocab_size = config.vocab_size if lora_config: @@ -479,13 +465,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ] expert_params_mapping = [ - # These are the activation scales for the experts - # (param_name, weight_name, expert_id) - ("ws_scale" if weight_name in ["w1", "w3"] else "w2s_scale", - f"experts.{expert_id}.{weight_name}.weight_scale", expert_id) - for expert_id in range(self.config.num_local_experts) - for weight_name in ["w1", "w2", "w3"] - ] + [ # These are the weights for the experts # (param_name, weight_name, expert_id) ("ws" if weight_name in ["w1", "w3"] else "w2s", From a8d4b334d79a53dcfbcebce5a217ed7a3f49b07c Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 14:15:56 +0000 Subject: [PATCH 66/90] make ./format pass --- run_fp8.py | 21 ++-- .../model_executor/layers/quantization/fp8.py | 117 ++++++++++-------- 2 files changed, 73 insertions(+), 65 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 507f9972a740f..9ddd687c0e8e4 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -4,12 +4,10 @@ from vllm import LLM -choices = ["llama-static", - "mistral-static", - "mistral-dynamic", - "mixtral-static", - "tinyllama-fp16", - "qwen-fp16"] +choices = [ + "llama-static", "mistral-static", "mistral-dynamic", "mixtral-static", + "tinyllama-fp16", "qwen-fp16" +] parser = argparse.ArgumentParser() parser.add_argument("--type", choices=choices) @@ -39,10 +37,13 @@ tokenizer = AutoTokenizer.from_pretrained(model_name) - prompt = tokenizer.apply_chat_template([{ - "role": "user", - "content": "What is open source software?" - }], tokenize=False, add_generation_prompt=True) + prompt = tokenizer.apply_chat_template( + [{ + "role": "user", + "content": "What is open source software?" + }], + tokenize=False, + add_generation_prompt=True) print(f"----- Prompt: {prompt}") outputs = model.generate(prompt) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index ddeb95abb19a1..a4debe8aec224 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -5,7 +5,7 @@ from torch.nn.parameter import Parameter from vllm import _custom_ops as ops -from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase +from vllm.model_executor.layers.linear import LinearBase from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig, QuantizeMethodBase) from vllm.model_executor.utils import set_weight_attrs @@ -45,7 +45,6 @@ def get_config_filenames(cls) -> List[str]: return [] @classmethod - def from_config(cls, config: Dict[str, Any]) -> "Fp8Config": quant_method = cls.get_from_keys(config, ["quant_method"]) is_serialized = ("fp8" in quant_method) @@ -53,8 +52,7 @@ def from_config(cls, config: Dict[str, Any]) -> "Fp8Config": return cls(is_serialized=is_serialized, activation_scheme=activation_scheme) - def get_quant_method( - self, layer: torch.nn.Module) -> "Fp8LinearMethod": + def get_quant_method(self, layer: torch.nn.Module) -> "Fp8LinearMethod": if isinstance(layer, LinearBase): return Fp8LinearMethod(self) return None @@ -63,7 +61,7 @@ def get_scaled_act_names(self) -> List[str]: return [] -class Fp8LinearMethod(LinearMethodBase): +class Fp8LinearMethod(QuantizeMethodBase): """Linear method for FP8. Supports loading FP8 checkpoints with static weight scale and dynamic/scale activation scale. @@ -97,46 +95,49 @@ def create_weights( del input_size, output_size output_size_per_partition = sum(output_partition_sizes) - layer.process_after_load = True + layer.process_after_loading = True layer.logical_widths = output_partition_sizes - + # WEIGHT - weight_dtype = torch.float8_e4m3fn if self.quant_config.is_serialized else params_dtype - weight = Parameter( - torch.empty(output_size_per_partition, - input_size_per_partition, - dtype=weight_dtype), - requires_grad=False) + weight_dtype = (torch.float8_e4m3fn + if self.quant_config.is_serialized else params_dtype) + weight = Parameter(torch.empty(output_size_per_partition, + input_size_per_partition, + dtype=weight_dtype), + requires_grad=False) layer.register_parameter("weight", weight) set_weight_attrs(weight, { **extra_weight_attrs, - "input_dim": 1, "output_dim": 0, + "input_dim": 1, + "output_dim": 0, }) - # SCALES - # We only need to load scales if the model is serialized FP8. - # Otherwise, scale creation is delayed until `process_weights_after_loading`. + # If checkpoint is serialized fp8, load them. + # Otherwise, wait until process_weights_after_loading. if self.quant_config.is_serialized: # WEIGHT SCALE - weight_scale = Parameter( - torch.empty(len(output_partition_sizes), dtype=torch.float32), - requires_grad=False) + weight_scale = Parameter(torch.empty(len(output_partition_sizes), + dtype=torch.float32), + requires_grad=False) layer.register_parameter("weight_scale", weight_scale) set_weight_attrs(weight_scale, { **extra_weight_attrs, - "shard_indexer": self.scales_shard_indexer, + "shard_indexer": + self.scales_shard_indexer, }) # ACTIVATION SCALE if self.quant_config.activation_scheme == "static": - act_scale = Parameter( - torch.empty(len(output_partition_sizes), dtype=torch.float32), - requires_grad=False) + act_scale = Parameter(torch.empty(len(output_partition_sizes), + dtype=torch.float32), + requires_grad=False) layer.register_parameter("act_scale", act_scale) - set_weight_attrs(act_scale, { - **extra_weight_attrs, - "shard_indexer": self.scales_shard_indexer, - }) + set_weight_attrs( + act_scale, { + **extra_weight_attrs, + "shard_indexer": + self.scales_shard_indexer, + }) def shard_id_as_int(self, shard_id: Union[str, int]) -> int: if isinstance(shard_id, int): @@ -161,36 +162,40 @@ def process_weights_after_loading(self, layer: Module) -> None: # only linear layers invoke "create_weights". So we check # whether "weight_scale" is registered to determine # whether the layer is a linear layer that requires quantization. - if not hasattr(layer, "process_after_load") or not layer.process_after_load: + if not hasattr( + layer, + "process_after_loading") or not layer.process_after_load: return - # If the checkpoint is fp16/bf16 (not serialized fp8), quantize the weights. + # If checkpoint is fp1616 (not serialized fp8), quantize the weights. if not self.quant_config.is_serialized: - qweight, weight_scale = ops.scaled_fp8_quant(layer.weight, scale=None) + qweight, weight_scale = ops.scaled_fp8_quant(layer.weight, + scale=None) layer.weight = Parameter(qweight.t(), requires_grad=False) layer.weight_scale = Parameter(weight_scale, requires_grad=False) layer.logical_widths = None layer.act_scale = None return - # If the checkpoint is serialized fp8, cleanup state_dict --> apply_weights. - # TODO: this will be cleaned up once we have the cutlass kernels. - else: + # TODO: cutlass kernels will remove the need for much of this logic. + # If the checkpoint is serialized fp8, we already loaded quantized. + # So, just cleanup the Parameters for easier use in apply() + else: # WEIGHT - # Tranpose weight for passing to torch._scaled_mm + # Transpose weight for passing to torch._scaled_mm weight = layer.weight layer.weight = Parameter(weight.t(), requires_grad=False) - + # WEIGHT_SCALE - # If we only have one logical shard, avoid the for loop in apply weights. - # TODO: once we have the cutlass_gemm, this will be removed. + # If we only have one logical shard, avoid the loop in apply(). if len(layer.logical_widths) == 1: - layer.weight_scale = Parameter(layer.weight_scale.max(), requires_grad=False) + layer.weight_scale = Parameter(layer.weight_scale.max(), + requires_grad=False) layer.logical_widths = None - + # ACT_SCALE - # Dyanmic: set to None (required input to ops.scaled_fp8_quant). - # Static: set to max of the act_scales (since they are equal to eachoter). + # Dynamic: set to None (required input to ops.scaled_fp8_quant). + # Static: set to max of the act_scales (since they are equal). if self.quant_config.activation_scheme == "dynamic": layer.act_scale = None elif self.quant_config.activation_scheme == "static": @@ -198,9 +203,11 @@ def process_weights_after_loading(self, layer: Module) -> None: raise ValueError( "All the act_scales for the logical weights of a layer " f"must be equal. But got {layer.act_scale}") - layer.act_scale = Parameter(layer.act_scale.max(), requires_grad=False) + layer.act_scale = Parameter(layer.act_scale.max(), + requires_grad=False) else: - raise ValueError(f"Unknown activation_scheme {self.quant_config.activation_scheme}") + raise ValueError( + f"Unknown scheme {self.quant_config.activation_scheme}") def apply(self, layer: torch.nn.Module, @@ -220,16 +227,18 @@ def apply(self, scale_a=x_scale, scale_b=layer.weight_scale, ) - - # TODO: replace will cutlass gemm_dq with epilogue fusion. + + # TODO: replace naive loop with cutlass gemm_dq w/ epilogue fusion. # Case 2: We have N weigth_scales for N logical weights. - # Current: inefficient for loop to apply each logical GEMM_DQ. else: - output = torch.empty(x.shape[0], layer.weight.shape[1], - dtype=x.dtype, device="cuda") + output = torch.empty(x.shape[0], + layer.weight.shape[1], + dtype=x.dtype, + device="cuda") start = 0 # Loop over the N logical shards. - for logical_width, w_scale in zip(layer.logical_widths, layer.weight_scale): + for logical_width, w_scale in zip(layer.logical_widths, + layer.weight_scale): end = start + logical_width out, _ = torch._scaled_mm( qinput, @@ -245,10 +254,8 @@ def apply(self, output.add_(bias) return output - + + def all_close_1d(x: torch.Tensor): assert len(x.shape) == 1 - for i in range(x.shape[0]): - if not torch.allclose(x[0], x[i]): - return False - return True + return all(torch.allclose(x[0], x[i]) for i in range(x.shape[0])) From 5be09702d205d033863de068d99b074ab05de761 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 14:46:30 +0000 Subject: [PATCH 67/90] better comments in linear.py --- vllm/model_executor/layers/linear.py | 90 ++++++++----------- .../model_executor/layers/quantization/fp8.py | 35 ++++---- 2 files changed, 51 insertions(+), 74 deletions(-) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index dc0723a49f526..9606662f7fea5 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -240,6 +240,9 @@ def __init__( self.register_parameter("bias", None) def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): + # Special case for Fp8 scales. + shard_indexer = getattr(param, "shard_indexer", None) + tp_rank = get_tensor_model_parallel_rank() output_dim = getattr(param, "output_dim", None) param_data = param.data @@ -248,10 +251,10 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): start_idx = tp_rank * shard_size loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size) - # TODO: canon - # This is for loading scales for fp8, which have no dims. - if len(loaded_weight.shape) == 0: - loaded_weight = loaded_weight.reshape(1) + # Special case for Fp8 scales. + elif shard_indexer is not None: + param_data, loaded_weight = shard_indexer( + param_data, loaded_weight, shard_id=0) assert param_data.shape == loaded_weight.shape param_data.copy_(loaded_weight) @@ -315,21 +318,10 @@ def weight_loader(self, param_data = param.data output_dim = getattr(param, "output_dim", None) + # Special case for AQLM codebooks. is_metadata = getattr(param, "is_metadata", False) - - # TODO: document. - # TODO: sync with is_metadata. - # For loading scales. + # Special case for Fp8 scales. shard_indexer = getattr(param, "shard_indexer", None) - logical_widths = getattr(param, "logical_widths", None) - if output_dim is not None and shard_indexer is not None: - raise NotImplementedError( - "We do not currently support output_dim != None and " - "shard_indexer != None for a parameter. Please open an issue.") - if loaded_shard_id is None and shard_indexer is not None: - raise NotImplementedError( - "We do not currently support loaded_shard_id == None and " - "shard_indexer != None for a parameter. Please open an issue.") if loaded_shard_id is None: # Loaded weight is already packed. @@ -344,14 +336,13 @@ def weight_loader(self, current_shard_offset += output_size packed_dim = getattr(param, "packed_dim", None) for shard_id, shard_offset, shard_size in shard_offsets: + # Special case for Quantization. # If quantized, we need to adjust the offset and size to account # for the packing. if packed_dim == output_dim: shard_size = shard_size // param.pack_factor shard_offset = shard_offset // param.pack_factor - - # If marlin, we need to adjust the offset and size to - # account for the tiling. + # Special case for Marlin. shard_size, shard_offset = adjust_marlin_shard( param, shard_size, shard_offset) @@ -366,15 +357,14 @@ def weight_loader(self, if output_dim is not None: shard_offset = sum(self.output_sizes[:loaded_shard_id]) // tp_size shard_size = self.output_sizes[loaded_shard_id] // tp_size + # Special case for quantization. # If quantized, we need to adjust the offset and size to account # for the packing. packed_dim = getattr(param, "packed_dim", None) if packed_dim == output_dim: shard_size = shard_size // param.pack_factor shard_offset = shard_offset // param.pack_factor - - # If marlin, we need to adjust the offset and size to - # account for the tiling. + # Special case for Marlin. shard_size, shard_offset = adjust_marlin_shard( param, shard_size, shard_offset) @@ -383,19 +373,16 @@ def weight_loader(self, start_idx = tp_rank * shard_size loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size) + # Special case for AQLM codebooks. elif is_metadata: # metadata indicates fixed size concatenated along dim 0 shard_size = loaded_weight.shape[0] shard_offset = loaded_shard_id * shard_size param_data = param_data.narrow(0, shard_offset, shard_size) - - # TODO: sync with is_metadata UX. - # If a param_shard_splitter is defined by the LinearMethod, use it. + # Special case sharding for Fp8 scales. elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer(param_data, - loaded_weight, - loaded_shard_id, - logical_widths) + param_data, loaded_weight = shard_indexer( + param_data, loaded_weight, loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) @@ -477,19 +464,10 @@ def weight_loader(self, loaded_shard_id: Optional[str] = None): param_data = param.data output_dim = getattr(param, "output_dim", None) + # Special case for AQLM codebooks. is_metadata = getattr(param, "is_metadata", False) - - # TODO: sync with is_metadata UX + # Special case for Fp8 scales. shard_indexer = getattr(param, "shard_indexer", None) - logical_widths = getattr(param, "logical_widths", None) - if output_dim is not None and shard_indexer is not None: - raise NotImplementedError( - "We do not currently support output_dim != None and " - "shard_indexer != None for a parameter. Please open an issue.") - if loaded_shard_id is None and shard_indexer is not None: - raise NotImplementedError( - "We do not currently support loaded_shard_id == None and " - "shard_indexer != None for a parameter. Please open an issue.") if loaded_shard_id is None: # Loaded weight is already packed. @@ -507,14 +485,14 @@ def weight_loader(self, ] packed_dim = getattr(param, "packed_dim", None) for shard_id, shard_offset, shard_size in shard_offsets: + # Special case for Quantized Weights. # If quantized, we need to adjust the offset and size to account # for the packing. if packed_dim == output_dim: shard_size = shard_size // param.pack_factor shard_offset = shard_offset // param.pack_factor - # If marlin, we need to adjust the offset and size to - # account for the tiling. + # Special case for Marlin. shard_size, shard_offset = adjust_marlin_shard( param, shard_size, shard_offset) @@ -536,6 +514,7 @@ def weight_loader(self, shard_offset = (self.num_heads + self.num_kv_heads) * self.head_size shard_size = self.num_kv_heads * self.head_size + # Special case for Quantized Weights. # If quantized, we need to adjust the offset and size to account # for the packing. packed_dim = getattr(param, "packed_dim", None) @@ -543,8 +522,7 @@ def weight_loader(self, shard_size = shard_size // param.pack_factor shard_offset = shard_offset // param.pack_factor - # If marlin, we need to adjust the offset and size to - # account for the tiling. + # Special case for Marlin. shard_size, shard_offset = adjust_marlin_shard( param, shard_size, shard_offset) @@ -557,19 +535,18 @@ def weight_loader(self, start_idx = shard_id * shard_size loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size) + # Special case for for AQLM codebooks. elif is_metadata: # metadata indicates fixed size concatenated along dim 0 shard_size = loaded_weight.shape[0] shard_index = ["q", "k", "v"].index(loaded_shard_id) param_data = param_data.narrow(0, shard_index * shard_size, shard_size) - # TODO: sync with QKV - # If a param_shard_splitter is defined by the LinearMethod, use it. + # Special case for for Fp8 scales. elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer(param_data, - loaded_weight, - loaded_shard_id, - logical_widths) + + param_data, loaded_weight = shard_indexer( + param_data, loaded_weight, loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) if not ignore_warning: @@ -650,6 +627,9 @@ def __init__( self.register_parameter("bias", None) def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): + # Special case for Fp8 scales. + shard_indexer = getattr(param, "shard_indexer", None) + tp_rank = get_tensor_model_parallel_rank() input_dim = getattr(param, "input_dim", None) param_data = param.data @@ -658,10 +638,10 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): start_idx = tp_rank * shard_size loaded_weight = loaded_weight.narrow(input_dim, start_idx, shard_size) - # TODO: canon - # This is for loading scales for fp8, which have no dims. - if len(loaded_weight.shape) == 0: - loaded_weight = loaded_weight.reshape(1) + # Special case for Fp8 scales. + elif shard_indexer is not None: + param_data, loaded_weight = shard_indexer( + param_data, loaded_weight, shard_id=0) assert param_data.shape == loaded_weight.shape param_data.copy_(loaded_weight) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index a4debe8aec224..37e98b84134d0 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -95,7 +95,7 @@ def create_weights( del input_size, output_size output_size_per_partition = sum(output_partition_sizes) - layer.process_after_loading = True + layer.process_after_load = True layer.logical_widths = output_partition_sizes # WEIGHT @@ -139,32 +139,29 @@ def create_weights( self.scales_shard_indexer, }) - def shard_id_as_int(self, shard_id: Union[str, int]) -> int: - if isinstance(shard_id, int): - return shard_id - assert isinstance(shard_id, str) + def scales_shard_indexer( + self, param: torch.Tensor, loaded_weight: torch.Tensor, + shard_id: Union[str, int]) -> Tuple[torch.Tensor, torch.Tensor]: qkv_idxs = {"q": 0, "k": 1, "v": 2} - assert shard_id in qkv_idxs - return qkv_idxs[shard_id] - def scales_shard_indexer( - self, - param: torch.Tensor, - loaded_weight: torch.Tensor, - shard_id: Union[str, int], - logical_widths: List[int], - ) -> Tuple[torch.Tensor, torch.Tensor]: - del logical_widths - return param[self.shard_id_as_int(shard_id)], loaded_weight + if isinstance(shard_id, int): + pass + elif isinstance(shard_id, str): + if shard_id not in qkv_idxs: + raise ValueError(f"Unknown shard_id: {shard_id}") + shard_id = qkv_idxs[shard_id] + else: + ValueError(f"Shard id must be int or str but got {type(shard_id)}") + + return param[shard_id], loaded_weight def process_weights_after_loading(self, layer: Module) -> None: # Although the quant_method is propagated to all layers, # only linear layers invoke "create_weights". So we check # whether "weight_scale" is registered to determine # whether the layer is a linear layer that requires quantization. - if not hasattr( - layer, - "process_after_loading") or not layer.process_after_load: + if (not hasattr(layer, "process_after_load") or + not layer.process_after_load): return # If checkpoint is fp1616 (not serialized fp8), quantize the weights. From ef7992bd018d5aa50a4eff360e1aacccb7ea98f2 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 14:47:50 +0000 Subject: [PATCH 68/90] better comments in linear.py --- vllm/model_executor/layers/quantization/fp8.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 37e98b84134d0..ba83719c73624 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -175,8 +175,8 @@ def process_weights_after_loading(self, layer: Module) -> None: return # TODO: cutlass kernels will remove the need for much of this logic. - # If the checkpoint is serialized fp8, we already loaded quantized. - # So, just cleanup the Parameters for easier use in apply() + # If the checkpoint is serialized fp8, we already loaded quantized, + # so, just cleanup the Parameters for easier use in apply(). else: # WEIGHT # Transpose weight for passing to torch._scaled_mm From 06677919a4fb2f5a4bcbd42655bf8c6a93c0035d Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 15:04:52 +0000 Subject: [PATCH 69/90] fixed opt-125 --- run_fp8.py | 24 +++++++++++++----------- vllm/model_executor/models/opt.py | 3 ++- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/run_fp8.py b/run_fp8.py index 9ddd687c0e8e4..23298999a3050 100644 --- a/run_fp8.py +++ b/run_fp8.py @@ -6,7 +6,7 @@ choices = [ "llama-static", "mistral-static", "mistral-dynamic", "mixtral-static", - "tinyllama-fp16", "qwen-fp16" + "opt-static", "tinyllama-fp16", "qwen-fp16" ] parser = argparse.ArgumentParser() @@ -21,8 +21,8 @@ model_name = "nm-testing/mistral-fp8-static" elif args.type == "mistral-dynamic": model_name = "nm-testing/mistral-fp8-dynamic" - elif args.type == 'mixtral-static': - model_name = "nm-testing/Mixtral-8x7B-Instruct-v0.1-FP8" + elif args.type == "opt-static": + model_name = "nm-testing/opt-125m-fp8-static" elif args.type == 'tinyllama-fp16': model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" elif args.type == 'qwen-fp16': @@ -36,14 +36,16 @@ quantization="fp8") tokenizer = AutoTokenizer.from_pretrained(model_name) - - prompt = tokenizer.apply_chat_template( - [{ - "role": "user", - "content": "What is open source software?" - }], - tokenize=False, - add_generation_prompt=True) + if tokenizer.chat_template is not None: + prompt = tokenizer.apply_chat_template( + [{ + "role": "user", + "content": "What is open source software?" + }], + tokenize=False, + add_generation_prompt=True) + else: + prompt = "The best thing about" print(f"----- Prompt: {prompt}") outputs = model.generate(prompt) diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index 838a2f0adc4d1..133ee79260620 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -130,7 +130,7 @@ def __init__( bias=config.enable_bias, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) + # quant_config = getattr(quant_config, "quant_config", None) self.activation_fn = get_act_fn(config.activation_function, quant_config, config.ffn_dim) self.fc2 = RowParallelLinear( @@ -323,6 +323,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("qkv_proj", "v_proj", "v"), ] params_dict = dict(self.named_parameters(remove_duplicate=False)) + print(params_dict.keys()) for name, loaded_weight in weights: if "lm_head.weight" in name: continue From d8adf1455f5b1e62e62f383810bd6ad812133d98 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 15:05:08 +0000 Subject: [PATCH 70/90] removed run_fp8.py --- run_fp8.py | 53 ----------------------------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 run_fp8.py diff --git a/run_fp8.py b/run_fp8.py deleted file mode 100644 index 23298999a3050..0000000000000 --- a/run_fp8.py +++ /dev/null @@ -1,53 +0,0 @@ -import argparse - -from transformers import AutoTokenizer - -from vllm import LLM - -choices = [ - "llama-static", "mistral-static", "mistral-dynamic", "mixtral-static", - "opt-static", "tinyllama-fp16", "qwen-fp16" -] - -parser = argparse.ArgumentParser() -parser.add_argument("--type", choices=choices) - -if __name__ == "__main__": - args = parser.parse_args() - - if args.type == "llama-static": - model_name = "nm-testing/Meta-Llama-3-8B-Instruct-FP8" - elif args.type == "mistral-static": - model_name = "nm-testing/mistral-fp8-static" - elif args.type == "mistral-dynamic": - model_name = "nm-testing/mistral-fp8-dynamic" - elif args.type == "opt-static": - model_name = "nm-testing/opt-125m-fp8-static" - elif args.type == 'tinyllama-fp16': - model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" - elif args.type == 'qwen-fp16': - model_name = "Qwen/CodeQwen1.5-7B-Chat" - else: - raise ValueError(f"--type should be in {choices}") - - model = LLM(model_name, - enforce_eager=True, - max_model_len=1024, - quantization="fp8") - - tokenizer = AutoTokenizer.from_pretrained(model_name) - if tokenizer.chat_template is not None: - prompt = tokenizer.apply_chat_template( - [{ - "role": "user", - "content": "What is open source software?" - }], - tokenize=False, - add_generation_prompt=True) - else: - prompt = "The best thing about" - print(f"----- Prompt: {prompt}") - - outputs = model.generate(prompt) - generation = outputs[0].outputs[0].text - print(f"----- Generation: {generation}") From 9bb1a2b9ada6ba2586e8991ff7e81cf764385e4e Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 15:06:47 +0000 Subject: [PATCH 71/90] format --- vllm/model_executor/layers/linear.py | 20 +++++++++++-------- .../model_executor/layers/quantization/fp8.py | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 9606662f7fea5..1b8171ab3ea41 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -253,8 +253,9 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): shard_size) # Special case for Fp8 scales. elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer( - param_data, loaded_weight, shard_id=0) + param_data, loaded_weight = shard_indexer(param_data, + loaded_weight, + shard_id=0) assert param_data.shape == loaded_weight.shape param_data.copy_(loaded_weight) @@ -381,8 +382,9 @@ def weight_loader(self, param_data = param_data.narrow(0, shard_offset, shard_size) # Special case sharding for Fp8 scales. elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer( - param_data, loaded_weight, loaded_shard_id) + param_data, loaded_weight = shard_indexer(param_data, + loaded_weight, + loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) @@ -545,8 +547,9 @@ def weight_loader(self, # Special case for for Fp8 scales. elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer( - param_data, loaded_weight, loaded_shard_id) + param_data, loaded_weight = shard_indexer(param_data, + loaded_weight, + loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) if not ignore_warning: @@ -640,8 +643,9 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): shard_size) # Special case for Fp8 scales. elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer( - param_data, loaded_weight, shard_id=0) + param_data, loaded_weight = shard_indexer(param_data, + loaded_weight, + shard_id=0) assert param_data.shape == loaded_weight.shape param_data.copy_(loaded_weight) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index ba83719c73624..d651216b14fb0 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -160,8 +160,8 @@ def process_weights_after_loading(self, layer: Module) -> None: # only linear layers invoke "create_weights". So we check # whether "weight_scale" is registered to determine # whether the layer is a linear layer that requires quantization. - if (not hasattr(layer, "process_after_load") or - not layer.process_after_load): + if (not hasattr(layer, "process_after_load") + or not layer.process_after_load): return # If checkpoint is fp1616 (not serialized fp8), quantize the weights. From 169c9edf091efd6de980aa39aee886d320a8ccab Mon Sep 17 00:00:00 2001 From: mgoin Date: Sat, 27 Apr 2024 11:17:41 -0400 Subject: [PATCH 72/90] Cleanup opt.py --- vllm/model_executor/models/opt.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index 133ee79260620..336f765ababaa 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -130,7 +130,6 @@ def __init__( bias=config.enable_bias, quant_config=quant_config, ) - # quant_config = getattr(quant_config, "quant_config", None) self.activation_fn = get_act_fn(config.activation_function, quant_config, config.ffn_dim) self.fc2 = RowParallelLinear( @@ -323,7 +322,6 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): ("qkv_proj", "v_proj", "v"), ] params_dict = dict(self.named_parameters(remove_duplicate=False)) - print(params_dict.keys()) for name, loaded_weight in weights: if "lm_head.weight" in name: continue From 8ef9c7d1272bb519d3efa4626eb8fcd82de16305 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 16:43:47 +0000 Subject: [PATCH 73/90] added testing --- tests/models/test_fp8.py | 66 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 tests/models/test_fp8.py diff --git a/tests/models/test_fp8.py b/tests/models/test_fp8.py new file mode 100644 index 0000000000000..c8e8dade2d06e --- /dev/null +++ b/tests/models/test_fp8.py @@ -0,0 +1,66 @@ +"""Compares the outputs of gptq vs gptq_marlin +""" +import os + +import pytest +import torch +from transformers import AutoTokenizer +from vllm import LLM, SamplingParams +from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS + +os.environ["TOKENIZERS_PARALLELISM"] = "true" + +MAX_MODEL_LEN = 1024 + +MODELS = [ + "nm-testing/mistral-fp8-static", + "nm-testing/mistral-fp8-dynamic", + "mistralai/Mistral-7B-Instruct-v0.2", +] + +EXPECTED_STRS_MAP = { + "nm-testing/mistral-fp8-static" : [' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', " A neural network is a type of artificial intelligence model inspired by the human brain's structure and function", ' In the heart of a sprawling industrial city, nestled among the hum of machinery and the rhythm', ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', ' Japanese: 早く起きる'], # noqa: E501 + "nm-testing/mistral-fp8-dynamic": [' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', " A neural network is a type of artificial intelligence model inspired by the human brain's structure and function", ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', ' Japanese: 早く起きる鳥は虫を取る (S'], # noqa: E501 + "mistralai/Mistral-7B-Instruct-v0.2": [' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', " A neural network is a type of machine learning model inspired by the human brain's structure and function", ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', ' Japanese: 早く起きる鳥は虫を取る (S'], # noqa: E501 +} + +capability = torch.cuda.get_device_capability() +capability = capability[0] * 10 + capability[1] +fp8_not_supported = ( + capability < QUANTIZATION_METHODS["fp8"].get_min_capability()) + +@pytest.mark.skipif(fp8_not_supported, + reason="fp8 is not supported on this GPU type.") +@pytest.mark.parametrize("model_name", MODELS) +def test_models( + example_prompts, + model_name, +) -> None: + model = LLM( + model=model_name, + max_model_len=MAX_MODEL_LEN, + enforce_eager=True, + quantization="fp8") + + tokenizer = AutoTokenizer.from_pretrained(model_name) + formatted_prompts = [ + tokenizer.apply_chat_template( + [{ "role": "user", "content": prompt }], + tokenize=False, add_generation_prompt=True + ) for prompt in example_prompts ] + + params = SamplingParams(max_tokens=20, temperature=0) + generations = [] + # Note: these need to be run 1 at a time due to numerical precision, + # since the expected strs were generated this way. + for prompt in formatted_prompts: + outputs = model.generate(prompt, params) + generations.append(outputs[0].outputs[0].text) + del model + + expected_strs = EXPECTED_STRS_MAP[model_name] + for i in range(len(example_prompts)): + generated_str = generations[i] + expected_str = expected_strs[i] + assert expected_str == generated_str, ( + f"Test{i}:\nExpected: {expected_str!r}\nvLLM: {generated_str!r}") \ No newline at end of file From c7d6dd665b5f5aab20b7d03bb627632783576d83 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 16:45:27 +0000 Subject: [PATCH 74/90] ./format.sh --- tests/models/test_fp8.py | 67 ++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/tests/models/test_fp8.py b/tests/models/test_fp8.py index c8e8dade2d06e..14abe8576f6ff 100644 --- a/tests/models/test_fp8.py +++ b/tests/models/test_fp8.py @@ -1,10 +1,12 @@ -"""Compares the outputs of gptq vs gptq_marlin +# flake8: noqa +"""Tests fp8 models against ground truth generation """ import os import pytest import torch from transformers import AutoTokenizer + from vllm import LLM, SamplingParams from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS @@ -19,15 +21,43 @@ ] EXPECTED_STRS_MAP = { - "nm-testing/mistral-fp8-static" : [' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', " A neural network is a type of artificial intelligence model inspired by the human brain's structure and function", ' In the heart of a sprawling industrial city, nestled among the hum of machinery and the rhythm', ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', ' Japanese: 早く起きる'], # noqa: E501 - "nm-testing/mistral-fp8-dynamic": [' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', " A neural network is a type of artificial intelligence model inspired by the human brain's structure and function", ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', ' Japanese: 早く起きる鳥は虫を取る (S'], # noqa: E501 - "mistralai/Mistral-7B-Instruct-v0.2": [' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', " A neural network is a type of machine learning model inspired by the human brain's structure and function", ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', ' Japanese: 早く起きる鳥は虫を取る (S'], # noqa: E501 + "nm-testing/mistral-fp8-static": [ + ' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', + ' 1. 1950s: The Concept of AI is Born: The term', + ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', + " A neural network is a type of artificial intelligence model inspired by the human brain's structure and function", + ' In the heart of a sprawling industrial city, nestled among the hum of machinery and the rhythm', + ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', + ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', + ' Japanese: 早く起きる' + ], + "nm-testing/mistral-fp8-dynamic": [ + ' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', + ' 1. 1950s: The Concept of AI is Born: The term', + ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', + " A neural network is a type of artificial intelligence model inspired by the human brain's structure and function", + ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', + ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', + ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', + ' Japanese: 早く起きる鳥は虫を取る (S' + ], + "mistralai/Mistral-7B-Instruct-v0.2": [ + ' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', + ' 1. 1950s: The Concept of AI is Born: The term', + ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', + " A neural network is a type of machine learning model inspired by the human brain's structure and function", + ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', + ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', + ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', + ' Japanese: 早く起きる鳥は虫を取る (S' + ], } capability = torch.cuda.get_device_capability() capability = capability[0] * 10 + capability[1] -fp8_not_supported = ( - capability < QUANTIZATION_METHODS["fp8"].get_min_capability()) +fp8_not_supported = (capability < + QUANTIZATION_METHODS["fp8"].get_min_capability()) + @pytest.mark.skipif(fp8_not_supported, reason="fp8 is not supported on this GPU type.") @@ -36,18 +66,21 @@ def test_models( example_prompts, model_name, ) -> None: - model = LLM( - model=model_name, - max_model_len=MAX_MODEL_LEN, - enforce_eager=True, - quantization="fp8") + model = LLM(model=model_name, + max_model_len=MAX_MODEL_LEN, + enforce_eager=True, + quantization="fp8") tokenizer = AutoTokenizer.from_pretrained(model_name) formatted_prompts = [ - tokenizer.apply_chat_template( - [{ "role": "user", "content": prompt }], - tokenize=False, add_generation_prompt=True - ) for prompt in example_prompts ] + tokenizer.apply_chat_template([{ + "role": "user", + "content": prompt + }], + tokenize=False, + add_generation_prompt=True) + for prompt in example_prompts + ] params = SamplingParams(max_tokens=20, temperature=0) generations = [] @@ -57,10 +90,10 @@ def test_models( outputs = model.generate(prompt, params) generations.append(outputs[0].outputs[0].text) del model - + expected_strs = EXPECTED_STRS_MAP[model_name] for i in range(len(example_prompts)): generated_str = generations[i] expected_str = expected_strs[i] assert expected_str == generated_str, ( - f"Test{i}:\nExpected: {expected_str!r}\nvLLM: {generated_str!r}") \ No newline at end of file + f"Test{i}:\nExpected: {expected_str!r}\nvLLM: {generated_str!r}") From 50b58238d883ab7c99b0022a99410dc1a681671c Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 16:47:40 +0000 Subject: [PATCH 75/90] fixed typing --- vllm/model_executor/layers/quantization/fp8.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index d651216b14fb0..a0479e4e034b3 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -5,9 +5,8 @@ from torch.nn.parameter import Parameter from vllm import _custom_ops as ops -from vllm.model_executor.layers.linear import LinearBase -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig, QuantizeMethodBase) +from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase +from vllm.model_executor.layers.quantization.base_config import QuantizationConfig from vllm.model_executor.utils import set_weight_attrs ACTIVATION_SCHEMES = ["static", "dynamic"] @@ -61,7 +60,7 @@ def get_scaled_act_names(self) -> List[str]: return [] -class Fp8LinearMethod(QuantizeMethodBase): +class Fp8LinearMethod(LinearMethodBase): """Linear method for FP8. Supports loading FP8 checkpoints with static weight scale and dynamic/scale activation scale. From 4156ca9bca702a3b03f884fa9e067a1d5005377b Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 16:48:40 +0000 Subject: [PATCH 76/90] fixed typing --- vllm/model_executor/layers/quantization/fp8.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index a0479e4e034b3..ff1a8cf601be9 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -155,10 +155,6 @@ def scales_shard_indexer( return param[shard_id], loaded_weight def process_weights_after_loading(self, layer: Module) -> None: - # Although the quant_method is propagated to all layers, - # only linear layers invoke "create_weights". So we check - # whether "weight_scale" is registered to determine - # whether the layer is a linear layer that requires quantization. if (not hasattr(layer, "process_after_load") or not layer.process_after_load): return From 3148fc96c797e76ee7e23b209b6bfb60d15aab10 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 17:03:00 +0000 Subject: [PATCH 77/90] added warning format --- vllm/model_executor/layers/quantization/fp8.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index ff1a8cf601be9..79c68f7b8c516 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -5,12 +5,16 @@ from torch.nn.parameter import Parameter from vllm import _custom_ops as ops +from vllm.logger import init_logger from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase -from vllm.model_executor.layers.quantization.base_config import QuantizationConfig +from vllm.model_executor.layers.quantization.base_config import ( + QuantizationConfig) from vllm.model_executor.utils import set_weight_attrs ACTIVATION_SCHEMES = ["static", "dynamic"] +logger = init_logger(__name__) + class Fp8Config(QuantizationConfig): """Config class for FP8.""" @@ -21,6 +25,10 @@ def __init__( activation_scheme: str = "dynamic", ) -> None: self.is_serialized = is_serialized + if is_serialized: + logger.warning( + "Detected fp8 checkpoint. Please note that the " + "format is experimental and subject to change.") assert activation_scheme in ACTIVATION_SCHEMES self.activation_scheme = activation_scheme From 7846d6731c8b56023fe298888a60b9357960b6e2 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 27 Apr 2024 13:03:41 -0400 Subject: [PATCH 78/90] Update opt.py re-added opt change, will fix in another PR --- vllm/model_executor/models/opt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index 336f765ababaa..838a2f0adc4d1 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -130,6 +130,7 @@ def __init__( bias=config.enable_bias, quant_config=quant_config, ) + quant_config = getattr(quant_config, "quant_config", None) self.activation_fn = get_act_fn(config.activation_function, quant_config, config.ffn_dim) self.fc2 = RowParallelLinear( From ba408c668bafb8bc8b30afd9963b0abcb707620e Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sat, 27 Apr 2024 17:05:29 +0000 Subject: [PATCH 79/90] formatted --- vllm/model_executor/layers/quantization/fp8.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 79c68f7b8c516..ca58cacfa722d 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -26,9 +26,8 @@ def __init__( ) -> None: self.is_serialized = is_serialized if is_serialized: - logger.warning( - "Detected fp8 checkpoint. Please note that the " - "format is experimental and subject to change.") + logger.warning("Detected fp8 checkpoint. Please note that the " + "format is experimental and subject to change.") assert activation_scheme in ACTIVATION_SCHEMES self.activation_scheme = activation_scheme From 04617fd7f6035d1961e9fabbbaea1965a068dc80 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 27 Apr 2024 18:44:52 -0400 Subject: [PATCH 80/90] Update vllm/model_executor/layers/quantization/fp8.py Co-authored-by: Michael Goin --- vllm/model_executor/layers/quantization/fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index ca58cacfa722d..4f9e213899506 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -70,7 +70,7 @@ def get_scaled_act_names(self) -> List[str]: class Fp8LinearMethod(LinearMethodBase): """Linear method for FP8. Supports loading FP8 checkpoints with static weight scale and - dynamic/scale activation scale. + dynamic/static activation scale. Also supports loading quantized FP16/BF16 model checkpoints with dynamic activation scaling. The weight scaling factor will be initialized after From cc3d3955c88c06fb3d6f42e20184b785b5e505de Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 27 Apr 2024 18:46:45 -0400 Subject: [PATCH 81/90] Update vllm/model_executor/layers/quantization/fp8.py Co-authored-by: Michael Goin --- vllm/model_executor/layers/quantization/fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 4f9e213899506..086c34b9689da 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -228,7 +228,7 @@ def apply(self, ) # TODO: replace naive loop with cutlass gemm_dq w/ epilogue fusion. - # Case 2: We have N weigth_scales for N logical weights. + # Case 2: We have N weight_scales for N logical weights. else: output = torch.empty(x.shape[0], layer.weight.shape[1], From f556016cf7391072627fd315f6d82f4ab6bc1db5 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sun, 28 Apr 2024 13:24:17 -0400 Subject: [PATCH 82/90] auto detect shared scale (#214) --- vllm/model_executor/layers/quantization/fp8.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 086c34b9689da..e5f5a78696cff 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -166,7 +166,7 @@ def process_weights_after_loading(self, layer: Module) -> None: or not layer.process_after_load): return - # If checkpoint is fp1616 (not serialized fp8), quantize the weights. + # If checkpoint is fp/bf16 (not serialized fp8), quantize the weights. if not self.quant_config.is_serialized: qweight, weight_scale = ops.scaled_fp8_quant(layer.weight, scale=None) @@ -186,8 +186,8 @@ def process_weights_after_loading(self, layer: Module) -> None: layer.weight = Parameter(weight.t(), requires_grad=False) # WEIGHT_SCALE - # If we only have one logical shard, avoid the loop in apply(). - if len(layer.logical_widths) == 1: + # If all weight_scales are equal, use a single scale to avoid naive loop. + if all_close_1d(layer.weight_scale): layer.weight_scale = Parameter(layer.weight_scale.max(), requires_grad=False) layer.logical_widths = None @@ -217,7 +217,7 @@ def apply(self, # If static, layer.act_scale is scalar and x_scale set to act_scale. qinput, x_scale = ops.scaled_fp8_quant(x, layer.act_scale) - # Case 1: we have one single scale for N logical weights. + # Case 1: we have 1 weight_scale for N logical weights. if layer.logical_widths is None: output, _ = torch._scaled_mm( qinput, From 30bfbd81e1e907b58dadf6c56fa525d50ab4f07e Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Sun, 28 Apr 2024 18:17:18 +0000 Subject: [PATCH 83/90] ./format.sh --- vllm/model_executor/layers/quantization/fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index e5f5a78696cff..554e8fe1546e2 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -186,7 +186,7 @@ def process_weights_after_loading(self, layer: Module) -> None: layer.weight = Parameter(weight.t(), requires_grad=False) # WEIGHT_SCALE - # If all weight_scales are equal, use a single scale to avoid naive loop. + # If all weight_scales are equal, use a single scale. if all_close_1d(layer.weight_scale): layer.weight_scale = Parameter(layer.weight_scale.max(), requires_grad=False) From 572107ab92f3b5dce613ce1d69ecf8fd910f6165 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sun, 28 Apr 2024 21:07:18 -0400 Subject: [PATCH 84/90] Update vllm/model_executor/layers/quantization/fp8.py Co-authored-by: Cody Yu --- vllm/model_executor/layers/quantization/fp8.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 554e8fe1546e2..eda17ab316322 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -28,7 +28,8 @@ def __init__( if is_serialized: logger.warning("Detected fp8 checkpoint. Please note that the " "format is experimental and subject to change.") - assert activation_scheme in ACTIVATION_SCHEMES + if activation_scheme not in ACTIVATION_SCHEMES: + raise ValueError(f"Unsupported activation scheme {activation_scheme}") self.activation_scheme = activation_scheme @classmethod From 41fbde92035f7cab6369f09081132da40d0c9bf5 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Mon, 29 Apr 2024 02:08:59 +0000 Subject: [PATCH 85/90] ./format.sh --- vllm/model_executor/layers/quantization/fp8.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index eda17ab316322..cdcbab1a515f3 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -29,7 +29,8 @@ def __init__( logger.warning("Detected fp8 checkpoint. Please note that the " "format is experimental and subject to change.") if activation_scheme not in ACTIVATION_SCHEMES: - raise ValueError(f"Unsupported activation scheme {activation_scheme}") + raise ValueError( + f"Unsupported activation scheme {activation_scheme}") self.activation_scheme = activation_scheme @classmethod From f2cd5617e0c8144c75f8039f677910296dc77416 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Mon, 29 Apr 2024 02:19:46 +0000 Subject: [PATCH 86/90] addressed cody's comments + format --- vllm/model_executor/layers/linear.py | 47 ++++++++++--------- .../model_executor/layers/quantization/fp8.py | 30 ++++++------ 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 1b8171ab3ea41..6a732f30bfab9 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -241,7 +241,8 @@ def __init__( def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): # Special case for Fp8 scales. - shard_indexer = getattr(param, "shard_indexer", None) + fp8_scales_shard_indexer = getattr(param, "fp8_scales_shard_indexer", + None) tp_rank = get_tensor_model_parallel_rank() output_dim = getattr(param, "output_dim", None) @@ -252,10 +253,10 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): loaded_weight = loaded_weight.narrow(output_dim, start_idx, shard_size) # Special case for Fp8 scales. - elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer(param_data, - loaded_weight, - shard_id=0) + elif fp8_scales_shard_indexer is not None: + param_data, loaded_weight = fp8_scales_shard_indexer(param_data, + loaded_weight, + shard_id=0) assert param_data.shape == loaded_weight.shape param_data.copy_(loaded_weight) @@ -322,7 +323,8 @@ def weight_loader(self, # Special case for AQLM codebooks. is_metadata = getattr(param, "is_metadata", False) # Special case for Fp8 scales. - shard_indexer = getattr(param, "shard_indexer", None) + fp8_scales_shard_indexer = getattr(param, "fp8_scales_shard_indexer", + None) if loaded_shard_id is None: # Loaded weight is already packed. @@ -380,11 +382,10 @@ def weight_loader(self, shard_size = loaded_weight.shape[0] shard_offset = loaded_shard_id * shard_size param_data = param_data.narrow(0, shard_offset, shard_size) - # Special case sharding for Fp8 scales. - elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer(param_data, - loaded_weight, - loaded_shard_id) + # Special case for Fp8 scales. + elif fp8_scales_shard_indexer is not None: + param_data, loaded_weight = fp8_scales_shard_indexer( + param_data, loaded_weight, loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) @@ -469,7 +470,8 @@ def weight_loader(self, # Special case for AQLM codebooks. is_metadata = getattr(param, "is_metadata", False) # Special case for Fp8 scales. - shard_indexer = getattr(param, "shard_indexer", None) + fp8_scales_shard_indexer = getattr(param, "fp8_scales_shard_indexer", + None) if loaded_shard_id is None: # Loaded weight is already packed. @@ -544,12 +546,10 @@ def weight_loader(self, shard_index = ["q", "k", "v"].index(loaded_shard_id) param_data = param_data.narrow(0, shard_index * shard_size, shard_size) - # Special case for for Fp8 scales. - elif shard_indexer is not None: - - param_data, loaded_weight = shard_indexer(param_data, - loaded_weight, - loaded_shard_id) + # Special case for Fp8 scales. + elif fp8_scales_shard_indexer is not None: + param_data, loaded_weight = fp8_scales_shard_indexer( + param_data, loaded_weight, loaded_shard_id) else: ignore_warning = getattr(param, "ignore_warning", False) if not ignore_warning: @@ -631,7 +631,8 @@ def __init__( def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): # Special case for Fp8 scales. - shard_indexer = getattr(param, "shard_indexer", None) + fp8_scales_shard_indexer = getattr(param, "fp8_scales_shard_indexer", + None) tp_rank = get_tensor_model_parallel_rank() input_dim = getattr(param, "input_dim", None) @@ -642,10 +643,10 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): loaded_weight = loaded_weight.narrow(input_dim, start_idx, shard_size) # Special case for Fp8 scales. - elif shard_indexer is not None: - param_data, loaded_weight = shard_indexer(param_data, - loaded_weight, - shard_id=0) + elif fp8_scales_shard_indexer is not None: + param_data, loaded_weight = fp8_scales_shard_indexer(param_data, + loaded_weight, + shard_id=0) assert param_data.shape == loaded_weight.shape param_data.copy_(loaded_weight) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index cdcbab1a515f3..c16fd3acea004 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -21,11 +21,11 @@ class Fp8Config(QuantizationConfig): def __init__( self, - is_serialized: bool = False, + is_checkpoint_fp8_serialized: bool = False, activation_scheme: str = "dynamic", ) -> None: - self.is_serialized = is_serialized - if is_serialized: + self.is_checkpoint_fp8_serialized = is_checkpoint_fp8_serialized + if is_checkpoint_fp8_serialized: logger.warning("Detected fp8 checkpoint. Please note that the " "format is experimental and subject to change.") if activation_scheme not in ACTIVATION_SCHEMES: @@ -55,9 +55,9 @@ def get_config_filenames(cls) -> List[str]: @classmethod def from_config(cls, config: Dict[str, Any]) -> "Fp8Config": quant_method = cls.get_from_keys(config, ["quant_method"]) - is_serialized = ("fp8" in quant_method) + is_checkpoint_fp8_serialized = ("fp8" in quant_method) activation_scheme = cls.get_from_keys(config, ["activation_scheme"]) - return cls(is_serialized=is_serialized, + return cls(is_checkpoint_fp8_serialized=is_checkpoint_fp8_serialized, activation_scheme=activation_scheme) def get_quant_method(self, layer: torch.nn.Module) -> "Fp8LinearMethod": @@ -108,7 +108,8 @@ def create_weights( # WEIGHT weight_dtype = (torch.float8_e4m3fn - if self.quant_config.is_serialized else params_dtype) + if self.quant_config.is_checkpoint_fp8_serialized else + params_dtype) weight = Parameter(torch.empty(output_size_per_partition, input_size_per_partition, dtype=weight_dtype), @@ -122,17 +123,18 @@ def create_weights( # If checkpoint is serialized fp8, load them. # Otherwise, wait until process_weights_after_loading. - if self.quant_config.is_serialized: + if self.quant_config.is_checkpoint_fp8_serialized: # WEIGHT SCALE weight_scale = Parameter(torch.empty(len(output_partition_sizes), dtype=torch.float32), requires_grad=False) layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs(weight_scale, { - **extra_weight_attrs, - "shard_indexer": - self.scales_shard_indexer, - }) + set_weight_attrs( + weight_scale, { + **extra_weight_attrs, + "fp8_scales_shard_indexer": + self.scales_shard_indexer, + }) # ACTIVATION SCALE if self.quant_config.activation_scheme == "static": @@ -143,7 +145,7 @@ def create_weights( set_weight_attrs( act_scale, { **extra_weight_attrs, - "shard_indexer": + "fp8_scales_shard_indexer": self.scales_shard_indexer, }) @@ -169,7 +171,7 @@ def process_weights_after_loading(self, layer: Module) -> None: return # If checkpoint is fp/bf16 (not serialized fp8), quantize the weights. - if not self.quant_config.is_serialized: + if not self.quant_config.is_checkpoint_fp8_serialized: qweight, weight_scale = ops.scaled_fp8_quant(layer.weight, scale=None) layer.weight = Parameter(qweight.t(), requires_grad=False) From 125266e10a5a17a5079a839fc64b6c0b883c7cf9 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Mon, 29 Apr 2024 02:47:16 +0000 Subject: [PATCH 87/90] make mypy happy --- vllm/model_executor/layers/quantization/fp8.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index c16fd3acea004..5952f382f8e8f 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -60,7 +60,8 @@ def from_config(cls, config: Dict[str, Any]) -> "Fp8Config": return cls(is_checkpoint_fp8_serialized=is_checkpoint_fp8_serialized, activation_scheme=activation_scheme) - def get_quant_method(self, layer: torch.nn.Module) -> "Fp8LinearMethod": + def get_quant_method( + self, layer: torch.nn.Module) -> Optional["Fp8LinearMethod"]: if isinstance(layer, LinearBase): return Fp8LinearMethod(self) return None From 280a4d5e5dc29d5f8fc9b52f92227d835f76d3b8 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 30 Apr 2024 01:49:47 +0000 Subject: [PATCH 88/90] test --- tests/models/test_fp8.py | 16 +++--- .../model_executor/layers/quantization/fp8.py | 53 ++++++++++--------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/tests/models/test_fp8.py b/tests/models/test_fp8.py index 14abe8576f6ff..2ed08afc270a4 100644 --- a/tests/models/test_fp8.py +++ b/tests/models/test_fp8.py @@ -1,5 +1,6 @@ # flake8: noqa """Tests fp8 models against ground truth generation +Note: these tests will only pass on L4 GPU. """ import os @@ -22,27 +23,27 @@ EXPECTED_STRS_MAP = { "nm-testing/mistral-fp8-static": [ - ' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', + ' VLLM (Vulcan Language Model) is an open-source inference and serving engine', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', - " A neural network is a type of artificial intelligence model inspired by the human brain's structure and function", - ' In the heart of a sprawling industrial city, nestled among the hum of machinery and the rhythm', + ' A neural network is a type of machine learning model inspired by the structure and function of the human brain', + ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', ' Japanese: 早く起きる' ], "nm-testing/mistral-fp8-dynamic": [ - ' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', + ' VLLM (Vulcan Language Model) is an open-source, high-throughput', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', - " A neural network is a type of artificial intelligence model inspired by the human brain's structure and function", - ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', + " A neural network is a type of machine learning model inspired by the human brain's structure and function", + ' Once upon a time, in the heart of a bustling city, there was a robot named B', ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', ' Japanese: 早く起きる鳥は虫を取る (S' ], "mistralai/Mistral-7B-Instruct-v0.2": [ - ' VLLM (Vulcan Learning Machine) is a high-performance and memory-efficient', + ' VLLM (Vulcan Language Model) is an open-source, high-throughput', ' 1. 1950s: The Concept of AI is Born: The term', ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', " A neural network is a type of machine learning model inspired by the human brain's structure and function", @@ -91,6 +92,7 @@ def test_models( generations.append(outputs[0].outputs[0].text) del model + print(generations) expected_strs = EXPECTED_STRS_MAP[model_name] for i in range(len(example_prompts)): generated_str = generations[i] diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 5952f382f8e8f..5a77193a03541 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -43,10 +43,7 @@ def get_supported_act_dtypes(cls) -> List[torch.dtype]: @classmethod def get_min_capability(cls) -> int: - # TODO: PyTorch 2.3.0+ is required to run FP8 on - # SM 89 (e.g. Ada) GPUs. Specifically, this PR has to - # be included: https://github.com/pytorch/pytorch/pull/118881 - return 90 + return 89 @classmethod def get_config_filenames(cls) -> List[str]: @@ -91,6 +88,24 @@ class Fp8LinearMethod(LinearMethodBase): def __init__(self, quant_config: Fp8Config): self.quant_config = quant_config + def _create_scale_param( + self, + scale_name: str, + layer: torch.nn.Module, + output_partition_sizes: List[int], + **extra_weight_attrs, + ) -> None: + scale = Parameter(torch.empty(len(output_partition_sizes), + dtype=torch.float32), + requires_grad=False) + layer.register_parameter(scale_name, scale) + set_weight_attrs( + scale, { + **extra_weight_attrs, + "fp8_scales_shard_indexer": + self.scales_shard_indexer, + }) + def create_weights( self, layer: torch.nn.Module, @@ -126,29 +141,19 @@ def create_weights( # Otherwise, wait until process_weights_after_loading. if self.quant_config.is_checkpoint_fp8_serialized: # WEIGHT SCALE - weight_scale = Parameter(torch.empty(len(output_partition_sizes), - dtype=torch.float32), - requires_grad=False) - layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs( - weight_scale, { - **extra_weight_attrs, - "fp8_scales_shard_indexer": - self.scales_shard_indexer, - }) + self._create_scale_param( + scale_name="weight_scale", + layer=layer, + output_partition_sizes=output_partition_sizes, + **extra_weight_attrs) # ACTIVATION SCALE if self.quant_config.activation_scheme == "static": - act_scale = Parameter(torch.empty(len(output_partition_sizes), - dtype=torch.float32), - requires_grad=False) - layer.register_parameter("act_scale", act_scale) - set_weight_attrs( - act_scale, { - **extra_weight_attrs, - "fp8_scales_shard_indexer": - self.scales_shard_indexer, - }) + self._create_scale_param( + scale_name="act_scale", + layer=layer, + output_partition_sizes=output_partition_sizes, + **extra_weight_attrs) def scales_shard_indexer( self, param: torch.Tensor, loaded_weight: torch.Tensor, From 8e1ede1fc4625f9b35aa29ca21c5404c56987159 Mon Sep 17 00:00:00 2001 From: Robert Shaw Date: Tue, 30 Apr 2024 20:58:08 +0000 Subject: [PATCH 89/90] cleaned up --- tests/models/test_fp8.py | 51 +++++------- .../model_executor/layers/quantization/fp8.py | 81 +++++++++---------- 2 files changed, 59 insertions(+), 73 deletions(-) diff --git a/tests/models/test_fp8.py b/tests/models/test_fp8.py index 2ed08afc270a4..e87a1783a83f1 100644 --- a/tests/models/test_fp8.py +++ b/tests/models/test_fp8.py @@ -16,41 +16,30 @@ MAX_MODEL_LEN = 1024 MODELS = [ - "nm-testing/mistral-fp8-static", - "nm-testing/mistral-fp8-dynamic", - "mistralai/Mistral-7B-Instruct-v0.2", + "nm-testing/Meta-Llama-3-8B-Instruct-FP8", + "meta-llama/Meta-Llama-3-8B-Instruct", ] EXPECTED_STRS_MAP = { - "nm-testing/mistral-fp8-static": [ - ' VLLM (Vulcan Language Model) is an open-source inference and serving engine', - ' 1. 1950s: The Concept of AI is Born: The term', - ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', - ' A neural network is a type of machine learning model inspired by the structure and function of the human brain', - ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', - ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', - ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', - ' Japanese: 早く起きる' + "nm-testing/Meta-Llama-3-8B-Instruct-FP8": [ + 'LLaMA is a high-throughput and memory-efficient inference and serving engine for Large Language Models (', + 'Here are the major milestones in the development of artificial intelligence (AI) from 1950 to ', + 'Artificial intelligence (AI) and human intelligence (HI) differ significantly in how they process information.', + 'A neural network is a complex system modeled after the human brain, composed of interconnected nodes or "ne', + 'Zeta-5, a highly advanced robot designed for menial labor, whirred and beep', + 'The COVID-19 pandemic has had a profound impact on global economic structures and future business models. Here', + 'The Mona Lisa, painted by Leonardo da Vinci in the early 16th century, is one of', + 'Here are the translations:\n\n**Japanese:** (Haya tori, nemuri nemuri)\n\n**' ], - "nm-testing/mistral-fp8-dynamic": [ - ' VLLM (Vulcan Language Model) is an open-source, high-throughput', - ' 1. 1950s: The Concept of AI is Born: The term', - ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', - " A neural network is a type of machine learning model inspired by the human brain's structure and function", - ' Once upon a time, in the heart of a bustling city, there was a robot named B', - ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', - ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', - ' Japanese: 早く起きる鳥は虫を取る (S' - ], - "mistralai/Mistral-7B-Instruct-v0.2": [ - ' VLLM (Vulcan Language Model) is an open-source, high-throughput', - ' 1. 1950s: The Concept of AI is Born: The term', - ' Artificial Intelligence (AI) and Human Intelligence (HI) are two distinct ways of processing information.', - " A neural network is a type of machine learning model inspired by the human brain's structure and function", - ' In the heart of the bustling city of Neo-Tokyo, nestled among the tow', - ' The COVID-19 pandemic has had a profound impact on global economic structures and has forced businesses to', - ' The Mona Lisa painting, created by the Italian artist Leonardo da Vinci between 15', - ' Japanese: 早く起きる鳥は虫を取る (S' + "meta-llama/Meta-Llama-3-8B-Instruct": [ + 'LLM (Large Language Model) is a type of artificial intelligence (AI) model that is trained', + 'Here are the major milestones in the development of artificial intelligence (AI) from 1950 to ', + 'Artificial intelligence (AI) and human intelligence (HI) differ significantly in how they process information.', + 'A neural network is a complex system modeled after the human brain, composed of interconnected nodes or "ne', + 'In the year 2154, the robotics lab at NeuroSpark Industries was on the cusp of', + 'The COVID-19 pandemic has had a profound impact on global economic structures and future business models. The', + 'The Mona Lisa, painted by Leonardo da Vinci in the early 16th century, is one of', + 'Here are the translations:\n\n**Japanese:** (Haya aki wa mushi o tsukamu' ], } diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 5a77193a03541..90bb98af9faf4 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -186,22 +186,28 @@ def process_weights_after_loading(self, layer: Module) -> None: layer.act_scale = None return - # TODO: cutlass kernels will remove the need for much of this logic. - # If the checkpoint is serialized fp8, we already loaded quantized, - # so, just cleanup the Parameters for easier use in apply(). + # If checkpoint is fp8, requantize the separately quantized logical + # weights into a single fp8 weight with a single weight scale. else: + # WEIGHT_SCALE / WEIGHT + # Loop over logical weights, requantizing with single scale. + max_w_scale = layer.weight_scale.max() + start = 0 + for idx, logical_width in enumerate(layer.logical_widths): + end = start + logical_width + weight_dq = per_tensor_dequantize(layer.weight[start:end, :], + layer.weight_scale[idx]) + + layer.weight[start:end, :] = per_tensor_quantize( + weight_dq, layer.weight_scale.max()) + start = end + layer.weight_scale = Parameter(max_w_scale, requires_grad=False) + # WEIGHT # Transpose weight for passing to torch._scaled_mm weight = layer.weight layer.weight = Parameter(weight.t(), requires_grad=False) - # WEIGHT_SCALE - # If all weight_scales are equal, use a single scale. - if all_close_1d(layer.weight_scale): - layer.weight_scale = Parameter(layer.weight_scale.max(), - requires_grad=False) - layer.logical_widths = None - # ACT_SCALE # Dynamic: set to None (required input to ops.scaled_fp8_quant). # Static: set to max of the act_scales (since they are equal). @@ -227,37 +233,14 @@ def apply(self, # If static, layer.act_scale is scalar and x_scale set to act_scale. qinput, x_scale = ops.scaled_fp8_quant(x, layer.act_scale) - # Case 1: we have 1 weight_scale for N logical weights. - if layer.logical_widths is None: - output, _ = torch._scaled_mm( - qinput, - layer.weight, - out_dtype=x.dtype, - scale_a=x_scale, - scale_b=layer.weight_scale, - ) - - # TODO: replace naive loop with cutlass gemm_dq w/ epilogue fusion. - # Case 2: We have N weight_scales for N logical weights. - else: - output = torch.empty(x.shape[0], - layer.weight.shape[1], - dtype=x.dtype, - device="cuda") - start = 0 - # Loop over the N logical shards. - for logical_width, w_scale in zip(layer.logical_widths, - layer.weight_scale): - end = start + logical_width - out, _ = torch._scaled_mm( - qinput, - layer.weight[:, start:end], - out_dtype=x.dtype, - scale_a=x_scale, - scale_b=w_scale, - ) - output[:, start:end] = out - start = end + # Fused GEMM_DQ + output, _ = torch._scaled_mm( + qinput, + layer.weight, + out_dtype=x.dtype, + scale_a=x_scale, + scale_b=layer.weight_scale, + ) if bias is not None: output.add_(bias) @@ -265,6 +248,20 @@ def apply(self, return output -def all_close_1d(x: torch.Tensor): +def all_close_1d(x: torch.Tensor) -> bool: assert len(x.shape) == 1 return all(torch.allclose(x[0], x[i]) for i in range(x.shape[0])) + + +def per_tensor_quantize(tensor: torch.Tensor, + inv_scale: float) -> torch.Tensor: + finfo = torch.finfo(torch.float8_e4m3fn) + qweight = (tensor / inv_scale).clamp(min=finfo.min, max=finfo.max) + return qweight.to(torch.float8_e4m3fn) + + +def per_tensor_dequantize(tensor: torch.Tensor, + inv_scale: float) -> torch.Tensor: + fake_qweight = tensor.to(torch.float16) + dq_weight = fake_qweight * inv_scale + return dq_weight From d067428c60bf6a1b7cca70f1d6672ebff25135b9 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Tue, 30 Apr 2024 17:08:13 -0400 Subject: [PATCH 90/90] Update vllm/model_executor/layers/quantization/fp8.py Co-authored-by: Michael Goin --- vllm/model_executor/layers/quantization/fp8.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 90bb98af9faf4..b57e1dde81a5f 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -240,11 +240,9 @@ def apply(self, out_dtype=x.dtype, scale_a=x_scale, scale_b=layer.weight_scale, + bias=bias, ) - if bias is not None: - output.add_(bias) - return output