-
Notifications
You must be signed in to change notification settings - Fork 258
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Kaihui-intel <[email protected]>
- Loading branch information
1 parent
e470f6c
commit b401b02
Showing
11 changed files
with
926 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,67 @@ | ||
{ | ||
"pytorch": { | ||
"gpt_j_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"gpt_j_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"llama2_7b_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"llama2_7b_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"opt_125m_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 8 | ||
}, | ||
"opt_125m_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 8 | ||
}, | ||
"dlrm_ipex": { | ||
"model_src_dir": "recommendation/dlrm/static_quant/ipex", | ||
"dataset_location": "/mnt/local_disk3/dataset/dlrm/dlrm/input", | ||
"input_model": "/mnt/local_disk3/dataset/dlrm/dlrm/dlrm_weight/tb00_40M.pt", | ||
"main_script": "dlrm_s_pytorch.py", | ||
"batch_size": 16384 | ||
} | ||
} | ||
} | ||
{ | ||
"pytorch": { | ||
"gpt_j_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"gpt_j_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"llama2_7b_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"llama2_7b_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
}, | ||
"opt_125m_ipex":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 8 | ||
}, | ||
"opt_125m_ipex_sq":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/smooth_quant", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 8 | ||
}, | ||
"dlrm_ipex": { | ||
"model_src_dir": "recommendation/dlrm/static_quant/ipex", | ||
"dataset_location": "/mnt/local_disk3/dataset/dlrm/dlrm/input", | ||
"input_model": "/mnt/local_disk3/dataset/dlrm/dlrm/dlrm_weight/tb00_40M.pt", | ||
"main_script": "dlrm_s_pytorch.py", | ||
"batch_size": 16384 | ||
}, | ||
"resnet18_pt2e_static":{ | ||
"model_src_dir": "cv/static_quant", | ||
"dataset_location": "/tf_dataset/pytorch/ImageNet/raw", | ||
"input_model": "", | ||
"main_script": "main.py", | ||
"batch_size": 1 | ||
}, | ||
"opt_125m_pt2e_static":{ | ||
"model_src_dir": "nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e", | ||
"dataset_location": "", | ||
"input_model": "", | ||
"main_script": "run_clm_no_trainer.py", | ||
"batch_size": 1 | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# ImageNet Quantization | ||
|
||
This implements quantization of popular model architectures, such as ResNet on the ImageNet dataset. | ||
|
||
## Requirements | ||
|
||
- Install requirements | ||
- `pip install -r requirements.txt` | ||
- Download the ImageNet dataset from http://www.image-net.org/ | ||
- Then, move and extract the training and validation images to labeled subfolders, using [the following shell script](extract_ILSVRC.sh) | ||
|
||
## Quantizaiton | ||
|
||
To quant a model and validate accaracy, run `main.py` with the desired model architecture and the path to the ImageNet dataset: | ||
|
||
```bash | ||
python main.py -a resnet18 [imagenet-folder with train and val folders] -q -e | ||
``` | ||
|
||
|
||
## Use Dummy Data | ||
|
||
ImageNet dataset is large and time-consuming to download. To get started quickly, run `main.py` using dummy data by "--dummy". Note that the loss or accuracy is useless in this case. | ||
|
||
```bash | ||
python main.py -a resnet18 --dummy -q -e | ||
``` |
80 changes: 80 additions & 0 deletions
80
examples/3.x_api/pytorch/cv/static_quant/extract_ILSVRC.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/bin/bash | ||
# | ||
# script to extract ImageNet dataset | ||
# ILSVRC2012_img_train.tar (about 138 GB) | ||
# ILSVRC2012_img_val.tar (about 6.3 GB) | ||
# make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory | ||
# | ||
# Adapted from: | ||
# https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md | ||
# https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4 | ||
# | ||
# imagenet/train/ | ||
# ├── n01440764 | ||
# │ ├── n01440764_10026.JPEG | ||
# │ ├── n01440764_10027.JPEG | ||
# │ ├── ...... | ||
# ├── ...... | ||
# imagenet/val/ | ||
# ├── n01440764 | ||
# │ ├── ILSVRC2012_val_00000293.JPEG | ||
# │ ├── ILSVRC2012_val_00002138.JPEG | ||
# │ ├── ...... | ||
# ├── ...... | ||
# | ||
# | ||
# Make imagnet directory | ||
# | ||
mkdir imagenet | ||
# | ||
# Extract the training data: | ||
# | ||
# Create train directory; move .tar file; change directory | ||
mkdir imagenet/train && mv ILSVRC2012_img_train.tar imagenet/train/ && cd imagenet/train | ||
# Extract training set; remove compressed file | ||
tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar | ||
# | ||
# At this stage imagenet/train will contain 1000 compressed .tar files, one for each category | ||
# | ||
# For each .tar file: | ||
# 1. create directory with same name as .tar file | ||
# 2. extract and copy contents of .tar file into directory | ||
# 3. remove .tar file | ||
find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done | ||
# | ||
# This results in a training directory like so: | ||
# | ||
# imagenet/train/ | ||
# ├── n01440764 | ||
# │ ├── n01440764_10026.JPEG | ||
# │ ├── n01440764_10027.JPEG | ||
# │ ├── ...... | ||
# ├── ...... | ||
# | ||
# Change back to original directory | ||
cd ../.. | ||
# | ||
# Extract the validation data and move images to subfolders: | ||
# | ||
# Create validation directory; move .tar file; change directory; extract validation .tar; remove compressed file | ||
mkdir imagenet/val && mv ILSVRC2012_img_val.tar imagenet/val/ && cd imagenet/val && tar -xvf ILSVRC2012_img_val.tar && rm -f ILSVRC2012_img_val.tar | ||
# get script from soumith and run; this script creates all class directories and moves images into corresponding directories | ||
wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash | ||
# | ||
# This results in a validation directory like so: | ||
# | ||
# imagenet/val/ | ||
# ├── n01440764 | ||
# │ ├── ILSVRC2012_val_00000293.JPEG | ||
# │ ├── ILSVRC2012_val_00002138.JPEG | ||
# │ ├── ...... | ||
# ├── ...... | ||
# | ||
# | ||
# Check total files after extract | ||
# | ||
# $ find train/ -name "*.JPEG" | wc -l | ||
# 1281167 | ||
# $ find val/ -name "*.JPEG" | wc -l | ||
# 50000 | ||
# |
Oops, something went wrong.