Skip to content

Commit

Permalink
[Ansor][AutoTVM v2.0] Phase 1: XGBoost Cost Model (apache#6270)
Browse files Browse the repository at this point in the history
* port xgb cost model

* add xgboost cost model

* fix lint

* address comments

* address comments

* Fix
  • Loading branch information
merrymercy authored and trevor-m committed Sep 3, 2020
1 parent ea17432 commit 860895d
Show file tree
Hide file tree
Showing 13 changed files with 779 additions and 57 deletions.
2 changes: 1 addition & 1 deletion include/tvm/auto_scheduler/feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
* \file auto_scheduler/feature.h
* \brief Feature extraction for the cost model.
* We extract one feature vector per BufferStoreNode statement in a TIR Stmt,
* so we call this feature as "Per Store" feature.
* so we call this feature as "per-store" feature.
* The cost model also does prediction for each BufferStoreNode statement and aggregates
* the predictions as the whole score for a TVM IR (Stmt).
*
Expand Down
4 changes: 2 additions & 2 deletions python/tvm/auto_scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
from .auto_schedule import SearchTask, TuningOptions, HardwareParams, \
auto_schedule, EmptyPolicy, SketchPolicy
from .compute_dag import ComputeDAG
from .cost_model import RandomModel
from .measure import MeasureInput, LocalBuilder, LocalRunner, RPCRunner, \
from .cost_model import RandomModel, XGBModel
from .measure import MeasureInput, MeasureResult, LocalBuilder, LocalRunner, RPCRunner, \
LocalRPCMeasureContext
from .measure_record import RecordToFile, RecordReader, load_best, \
load_records, save_records
Expand Down
22 changes: 21 additions & 1 deletion python/tvm/auto_scheduler/auto_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ def __init__(self, task, schedule_cost_model=RandomModel(), params=None, seed=No
seed or random.randint(1, 1 << 30), verbose, init_search_callbacks)

def generate_sketches(self, print_for_debug=False):
""" Generate the sketches, this is mainly used for debug.
""" Generate the sketches.
This python interface is mainly used for debugging and testing.
The actual search is all doen in c++.
Parameters
----------
Expand All @@ -180,6 +182,24 @@ def generate_sketches(self, print_for_debug=False):
print(s)
return sketches

def sample_initial_population(self, pop_size):
"""Sample initial population.
This python interface is mainly used for debugging and testing.
The actual search is all doen in c++.
Parameters
----------
pop_size : int
The size of sampled population
Returns
-------
states: List[State]
The sampled states
"""
states = _ffi_api.SketchPolicySampleInitialPopulation(self, pop_size)
return states

@tvm._ffi.register_object("auto_scheduler.TuningOptions")
class TuningOptions(Object):
""" This controls the options of performance tuning.
Expand Down
1 change: 1 addition & 0 deletions python/tvm/auto_scheduler/cost_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
""" Cost model that estimates the performance of programs """

from .cost_model import RandomModel
from .xgb_model import XGBModel
20 changes: 20 additions & 0 deletions python/tvm/auto_scheduler/cost_model/cost_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,25 @@ def predict_stages(self, task, states):
-------
scores: List[float]
The predicted scores for all stages in all states in the packed format
Note
----
For faster data copy between c++ and python, the python part returns scores in a
single flatten array using a packed format. The c++ part then unpacks the flatten array.
The packed format is:
{
float scores[N]; // scores[i] is the score for states[i].
int n_stage_0; // the number of stages in states[0]
float stage_scores_0[[n_stage_0] // the scores for all stages in states[0]
int n_stage_1; // the number of stages in states[1]
float stage_scores_1[n_stage_1]; // the scores for all stages in states[1]
...
int n_stage_i; // the number of stages in states[i]
float stage_scores_1[n_stage_i]; // the scores for all stages in states[i]
... // untill i == N - 1
}
To implement this format, we also store int as float, so we can store all numbers
into a single float array.
"""
raise NotImplementedError
Loading

0 comments on commit 860895d

Please sign in to comment.