Skip to content

Commit

Permalink
Fixes mlcommons#1761, llama2 and mixtral runtime error on CPU systems
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh committed Jul 2, 2024
1 parent 9e2c9f6 commit 9dc997f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
5 changes: 4 additions & 1 deletion language/llama2-70b/SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ def load_model(self):
self.model = self.model.to(self.device) # Force CPU if your system has GPU and you specifically want CPU-only run

self.model.eval()
self.model = self.model.to(memory_format=torch.channels_last)
try: # for systems with low ram, the below command gives error as some part is offloaded to disk
self.model = self.model.to(memory_format=torch.channels_last)
except:
pass

self.tokenizer = AutoTokenizer.from_pretrained(
self.model_path,
Expand Down
4 changes: 3 additions & 1 deletion language/mixtral-8x7b/SUT.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,10 @@ def load_model(self):
self.model = self.model.to(self.device)

self.model.eval()
if self.device != "cpu":
try: # for systems with low ram, the below command gives error as some part is offloaded to disk
self.model = self.model.to(memory_format=torch.channels_last)
except:
pass

self.tokenizer = AutoTokenizer.from_pretrained(
self.model_path,
Expand Down

0 comments on commit 9dc997f

Please sign in to comment.