Skip to content

Commit

Permalink
refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Jan 2, 2020
1 parent ef75257 commit 5908267
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 34 deletions.
62 changes: 48 additions & 14 deletions python/tvm/relay/quantize/_calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@
from .kl_divergence import _find_scale_by_kl


def _kl_scale(mod, dataset, split_by=-1):
logging.info("collecting statistics for calibration...")
func = _quantize.CreateStatsCollector(mod['main'])
def _get_profile_runtime(mod):
func = mod['main']
func = _quantize.CreateStatsCollector(func)

if tvm.target.current_target():
target = tvm.target.current_target()
ctx = tvm.context(target.target_name)
Expand All @@ -45,24 +46,58 @@ def _kl_scale(mod, dataset, split_by=-1):

with _transform.build_config(opt_level=3):
graph, lib, params = _build_module.build(func, target=target)

runtime = graph_runtime.create(graph, lib, ctx)
runtime.set_input(**params)

return runtime


def collect_stats(mod, dataset, chunk_by=-1):
"""Given an annotated graph, create a profile graph to collect profile data from the
calibration dataset. This pass collects simulated_quantize op input into a tuple.
Simulated_quantize ops are rewritten to identity mode. The tuple is the output of the profile
graph.
Parameters
----------
mod: Module
The simulation graph after annotation.
dataset: Iterable[NDArray]
The calibration dataset.
chunk_by: optional, int
The size of chunk to be returned in one iteration. It is meant to be
used for reducing memory usage. If not specified, return samples for
all layers in one chunk.
Returns
-------
ret: Iterable[list of ndarray]
List of output data of each layer, chunked by the chunk_by parameter
"""
logging.info("collecting statistics for calibration...")
runtime = _get_profile_runtime(mod)
num_outputs = runtime.get_num_outputs()
chunk_by = num_outputs if chunk_by == -1 else chunk_by

scales = []
split_by = num_outputs if split_by == -1 else split_by
for i in range(0, num_outputs, split_by):
outputs = [[] for i in range(min(split_by, num_outputs - i))]
for i in range(0, num_outputs, chunk_by):
outputs = [[] for i in range(min(chunk_by, num_outputs - i))]
for batch in dataset:
runtime.set_input(**batch)
runtime.run()
for j in range(i, min(i+split_by, num_outputs)):
for j in range(i, min(i+chunk_by, num_outputs)):
outputs[j-i].append(runtime.get_output(j).asnumpy())
samples = [np.concatenate(output).reshape(-1) for output in outputs]
yield [np.concatenate(output).reshape(-1) for output in outputs]


def _kl_scale(mod, dataset):
cfg = quantize.current_qconfig()
chunk_by = cfg.calibrate_chunk_by
scales = []
for samples in collect_stats(mod, dataset, chunk_by):
logging.info("finding threshold with kl for calibration...")
with mp.Pool() as pool:
logging.info("finding threshold with kl for calibration...")
scales += list(pool.map(_find_scale_by_kl, samples))

def func(_):
Expand Down Expand Up @@ -147,13 +182,12 @@ def calibrate(dataset=None):
ret: Function
The module pass function.
"""
def wrapped_func(mod, ctx): # pylint: disable=unused-argument
def wrapped_func(mod, _):
"""make transform.module pass happy"""
cfg = quantize.current_qconfig()

if cfg.calibrate_mode == 'kl_divergence':
cfg = quantize.current_qconfig()
input_scale_func = _kl_scale(mod, dataset, cfg.calibrate_split_by)
input_scale_func = _kl_scale(mod, dataset)
elif cfg.calibrate_mode == 'global_scale':
input_scale_func = _global_scale
else:
Expand Down
3 changes: 2 additions & 1 deletion python/tvm/relay/quantize/kl_divergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
from . import _quantize


def _find_scale_by_kl(arr, quantized_dtype='int8', num_bins=8001, num_quantized_bins=255):
def _find_scale_by_kl(arr, quantized_dtype='int8',
num_bins=8001, num_quantized_bins=255):
"""Given a tensor, find the optimal threshold for quantizing it.
The reference distribution is `q`, and the candidate distribution is `p`.
`q` is a truncated version of the original distribution.
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/relay/quantize/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class QConfig(NodeBase):
"round_for_shift": True,
"debug_enabled_ops": None,
"rounding": "UPWARD",
"calibrate_split_by": -1,
"calibrate_chunk_by": -1,
}

# pylint: disable=no-member
Expand Down
23 changes: 8 additions & 15 deletions src/relay/pass/quantize/calibrate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,14 @@ static std::vector<float> SmoothDistribution(const std::vector<float>& p,
return ret;
}

static float ComputeEntropy(std::vector<float>* p_ptr, std::vector<float>* q_ptr) {
std::vector<float>& p = *p_ptr;
std::vector<float>& q = *q_ptr;
CHECK_EQ(p.size(), q.size());
float p_sum = std::accumulate(p.begin(), p.end(), 0.f);
float q_sum = std::accumulate(q.begin(), q.end(), 0.f);
for (auto& it : p) {
it = it / p_sum;
}
for (auto& it : q) {
it = it / q_sum;
}
static float ComputeEntropy(float* p, float* q, size_t size) {
float p_sum = std::accumulate(p, p+size, 0.f);
float q_sum = std::accumulate(q, q+size, 0.f);
float ret = 0;
for (size_t i = 0; i < p.size(); i++) {
for (size_t i = 0; i < size; i++) {
CHECK(p[i] > 0 && q[i] > 0);
p[i] /= p_sum;
q[i] /= q_sum;
if (p[i] && q[i]) ret += p[i] * std::log(p[i] / q[i]);
}
return ret;
Expand All @@ -98,7 +91,7 @@ float MinimizeKL(const std::vector<int64_t>& hist,
thresholds[i - num_half_quantized_bins] = hist_edges[p_bin_idx_stop];

std::vector<int> sliced_nd_hist(p_bin_idx_stop - p_bin_idx_start);
std::vector<float> p(p_bin_idx_stop - p_bin_idx_start);
std::vector<float> p(sliced_nd_hist.size());
p[0] = 0;
p.back() = 0;
for (int j = 0; j < num_bins; j++) {
Expand Down Expand Up @@ -141,7 +134,7 @@ float MinimizeKL(const std::vector<int64_t>& hist,
if (!q.size()) {
divergence[i - num_half_quantized_bins] = std::numeric_limits<float>::infinity();
} else {
divergence[i - num_half_quantized_bins] = ComputeEntropy(&p, &q);
divergence[i - num_half_quantized_bins] = ComputeEntropy(p.data(), q.data(), p.size());
}
}
auto min_divergence_idx = std::distance(divergence.begin(),
Expand Down
4 changes: 2 additions & 2 deletions src/relay/pass/quantize/quantize.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class QConfigNode : public Object {
bool round_for_shift = true;
Array<Expr> debug_enabled_ops = Array<Expr>(ObjectPtr<Object>(nullptr));
std::string rounding = "UPWARD";
int calibrate_split_by = -1;
int calibrate_chunk_by = -1;

void VisitAttrs(AttrVisitor* v) {
v->Visit("nbit_input", &nbit_input);
Expand All @@ -95,7 +95,7 @@ class QConfigNode : public Object {
v->Visit("round_for_shift", &round_for_shift);
v->Visit("debug_enabled_ops", &debug_enabled_ops);
v->Visit("rounding", &rounding);
v->Visit("calibrate_split_by", &calibrate_split_by);
v->Visit("calibrate_chunk_by", &calibrate_chunk_by);
}

static constexpr const char* _type_key = "relay.quantize.QConfig";
Expand Down
3 changes: 2 additions & 1 deletion tests/python/relay/test_pass_auto_quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def test_calibrate_memory_bound():
dataset = get_calibration_dataset("data")
import multiprocessing
num_cpu = multiprocessing.cpu_count()
with relay.quantize.qconfig(calibrate_mode="kl_divergence", calibrate_split_by=num_cpu):
with relay.quantize.qconfig(calibrate_mode="kl_divergence",
calibrate_chunk_by=num_cpu):
relay.quantize.quantize(mod, params, dataset)


Expand Down

0 comments on commit 5908267

Please sign in to comment.