Fixes mlcommons#1761, llama2 and mixtral runtime error on CPU systems

GATEOverflow · Jul 2, 2024 · 9dc997f · 9dc997f
1 parent 9e2c9f6
commit 9dc997f
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 2 deletions.
diff --git a/language/llama2-70b/SUT.py b/language/llama2-70b/SUT.py
@@ -241,7 +241,10 @@ def load_model(self):
             self.model = self.model.to(self.device)  # Force CPU if your system has GPU and you specifically want CPU-only run
 
         self.model.eval()
-        self.model = self.model.to(memory_format=torch.channels_last)
+        try: # for systems with low ram, the below command gives error as some part is offloaded to disk
+            self.model = self.model.to(memory_format=torch.channels_last)
+        except:
+            pass
 
         self.tokenizer = AutoTokenizer.from_pretrained(
             self.model_path,

diff --git a/language/mixtral-8x7b/SUT.py b/language/mixtral-8x7b/SUT.py
@@ -301,8 +301,10 @@ def load_model(self):
             self.model = self.model.to(self.device)
 
         self.model.eval()
-        if self.device != "cpu":
+        try: # for systems with low ram, the below command gives error as some part is offloaded to disk
             self.model = self.model.to(memory_format=torch.channels_last)
+        except:
+            pass
 
         self.tokenizer = AutoTokenizer.from_pretrained(
             self.model_path,