diff --git a/.env b/.env new file mode 100644 index 000000000..d00fca5da --- /dev/null +++ b/.env @@ -0,0 +1,24 @@ +#!/bin/bash +# +# This script sets up the shell environment for this project. +# +# Usage: +# $ source .env + +export PHD="$(realpath $(dirname "$0"))" + +if [[ -f "$HOME/.cache/phd/tools/py/venv/phd/bin/activate" ]]; then + # If there is a virtualenv, use it. Note that even if it does exist, bazel + # will go ahead and ignore it, so we still need to rely on the system python + # being the required version. + . "$HOME/.cache/phd/tools/py/venv/phd/bin/activate" +fi + +# Note(github.com/ChrisCummins/phd/issues/55): On macOS, custom LDFLAGS and +# CPPFLAGS are required to pip build MySQLdb: +export LDFLAGS="-L/usr/local/opt/openssl/lib" +export CPPFLAGS="-I/usr/local/opt/openssl/include" + +# Increase the timeout on docker image pulls from the default 600s. +# See: https://github.com/bazelbuild/rules_docker +export PULLER_TIMEOUT=3600 diff --git a/.gitignore b/.gitignore index 18f631815..5d170b2a7 100644 --- a/.gitignore +++ b/.gitignore @@ -18,11 +18,7 @@ /node_modules # In-tree generated files. -/.env /bootstrap.sh -/config.pbtxt -/requirements.txt -/third_party/py/tensorflow/BUILD /learn/docker/clgen/*.tar.bz2 /experimental/deeplearning/clgen/docker_worker/*.tar.bz2 /experimental/deeplearning/clgen/docker_worker/cache diff --git a/.travis.yml b/.travis.yml index 56fe3ff36..8c34371c1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,8 +14,7 @@ language: env: global: - # ./configure script symlinks python3 to ~/.local/bin/python, and macos - # image needs GNU tools in PATH, rather than BSD versions. + # MacOS needs GNU tools in PATH, rather than BSD versions. - PATH="$HOME/.local/bin:/usr/local/opt/coreutils/libexec/gnubin/stat:/usr/local/opt/findutils/libexec/gnubin:/usr/local/opt/gnu-sed/libexec/gnubin:$PATH" # Environment variables needed for zlib. # See: https://github.com/pyenv/pyenv/issues/530 @@ -50,7 +49,6 @@ install: # Swap out GCC for GCC-5 on linux to enable support for --std=c++14. # See: https://gist.github.com/cotsog/3ce84675af0d74438d91 - if [[ $TRAVIS_OS_NAME == "linux" ]]; then sudo unlink /usr/bin/gcc && sudo ln -s /usr/bin/gcc-5 /usr/bin/gcc; fi - - ./configure --noninteractive # Symlink the system python2 in place so that we bypass the pyenv shim. - if [[ $TRAVIS_OS_NAME == "linux" ]]; then ln -s /usr/bin/python2 $HOME/.local/bin; fi diff --git a/BUILD b/BUILD index c0b4b91d6..ff2c552e4 100644 --- a/BUILD +++ b/BUILD @@ -61,12 +61,6 @@ sh_binary( srcs = ["make_build_info_pbtxt.sh"], ) -filegroup( - name = "config", - srcs = ["config.pbtxt"], - visibility = ["//visibility:public"], -) - proto_library( name = "config_pb", srcs = ["config.proto"], @@ -103,38 +97,6 @@ py_library( ], ) -py_test( - name = "configure_test", - srcs = ["configure_test.py"], - data = [":configure_py"], - deps = [ - "//labm8/py:app", - "//labm8/py:bazelutil", - "//labm8/py:test", - ], -) - -py_library( - name = "getconfig", - srcs = ["getconfig.py"], - visibility = ["//visibility:public"], - deps = [ - "//:config_pb_py", - "//:config_pbtxt_py", - "//labm8/py:pbutil", - ], -) - -py_test( - name = "getconfig_test", - srcs = ["getconfig_test.py"], - deps = [ - ":getconfig", - "//labm8/py:app", - "//labm8/py:test", - ], -) - # Golang. # Gazelle directive: # gazelle:prefix github.com/ChrisCummins/phd diff --git a/README.md b/README.md index 356e27b64..0b03f7039 100644 --- a/README.md +++ b/README.md @@ -28,43 +28,16 @@ programs it learns from. ## Getting Started -Configure the build and answer the yes/no questions. The default answers should -be fine: +Build CLgen using: ```sh -$ ./configure -``` - -Note that CUDA support requires CUDA to have been installed separately, -see the [TensorFlow build docs](https://www.tensorflow.org/install/) for -instructions. CUDA support has only been tested for Linux builds, not macOS or -Docker containers. - -```sh -$ bazel build //deeplearning/clgen -``` - -The configure process generates a `bootstrap.sh` script which will install the -required dependent packages. Since installing these packages will affect the -global state of your system, and may requires root access, inspect this script -carefully. Once you're happy to proceed, run it using: - -```sh -$ bash ./bootstrap.sh -``` - -Finally, we must set up the shell environment for running bazel. The file `.env` -is created by the configure process and must be sourced for every shell we want -to use bazel with: - -```sh -$ source $PWD/.env +$ bazel build -c opt //deeplearning/clgen ``` Use our tiny example dataset to train and sample your first CLgen model: ```sh -$ bazel run //deeplearning/clgen -- \ +$ bazel-bin/deeplearning/clgen -- \ --config $PWD/deeplearning/clgen/tests/data/tiny/config.pbtxt ``` diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..e9a4f035e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,216 @@ +# Python package dependencies. +# This file is copied to $ROOT/requirements.txt during configure. The reason +# for this is because the TensorFlow package is replaced with tensorflow-gpu +# if --with-cuda is enabled. +absl-py==0.7.0 +appdirs==1.4.3 +appnope==0.1.0 +ascii_art==0.1.0 +aspy.refactor-imports==1.1.0 +astroid==1.6.1 +attrs==19.3.0 +autoenv==1.0.0 +backports-abc==0.5 +backports.functools-lru-cache==1.5 +backports.shutil-get-terminal-size==1.0.0 +bibtexparser==1.1.0 +bleach==1.5.0 +bokeh==1.0.2 +cached-property==1.5.1 +certifi==2018.4.16 # Dependency of requests. +cffi==1.11.5 +chardet==3.0.4 # Dependency of requests. +checksumdir==1.0.5 +Click==7.0 +configparser==3.5.0 +coverage==4.5.1 # Needed by pytest-cov. +cpplint==1.3.0 +cycler==0.10.0 +dash==0.39.0 +dash-core-components==0.44.0 +dash-html-components==0.14.0 +dash-renderer==0.20.0 +dash-table==3.6.0 +dataclasses==0.6 +decorator==4.3.0 +detect-secrets==0.12.4 +# The graph_nets package depends on a specific version of dm-sonnet. See: +# https://github.com/deepmind/graph_nets/blob/master/setup.py +dm-sonnet==1.23 +docutils==0.14 +editdistance==0.3.1 +entrypoints==0.2.3 +enum34==1.1.6 +fasteners==0.15 +flaky==3.6.1 +Flask==1.0.2 +Flask-Compress==1.4.0 +flask-cors==3.0.6 +Flask-SQLalchemy==2.4.0 +fs==2.4.8 +fs.sshfs==0.11.1 +funcsigs==1.0.2 +futures==3.1.1 +fuzzywuzzy==0.16.0 +gast==0.2.2. # Dependency of tensorflow. +GitPython==2.1.11 +GPUtil==1.4.0 +graph_nets==1.0.2 +graphviz==0.9 +grpcio-tools==1.18.0 +grpcio==1.18.0 +gspread==3.1.0 +gspread-dataframe==3.0.3 +gym==0.10.5 +h5py==2.9.0 +html5lib==0.9999999 +httplib2==0.14 # Needed by oauth2client +humanize==0.5.1 +importlib-metadata==0.23 # Needed by pytest. +idna==2.6 # Dependency of requests. +inotify==0.2.10 +ipaddress==1.0.23 # Dependency of urllib3 +ipdb==0.11 +ipykernel==4.8.2 +ipython-genutils==0.2.0 +ipython==5.7.0 +ipywidgets==7.1.2 +isort==4.3.4 +itsdangerous==1.1.0 # Dependency of dash. +jedi==0.11.1 +jinja2==2.10.1 +jsonschema==2.6.0 +jupyter-client==5.2.2 +jupyter-console==5.2.0 +jupyter-core==4.4.0 +jupyter==1.0.0 +# Support for local runtimes with Google Colaboratory. See: +# https://research.google.com/colaboratory/local-runtimes.html +jupyter_http_over_ws==0.0.1a3 +jwt==0.6.1 # Dependency PyGithub. +Keras==2.3.1 +kiwisolver==1.0.1 +lazy-object-proxy==1.3.1 +lru-dict==1.1.6 +MarkupSafe==1.0 +matplotlib==2.2.0rc1 +mccabe==0.6.1 +memory-profiler==0.55.0 +mistune==0.8.3 +monotonic==1.5 # Needed by fasteners. +more-itertools==7.2.0 # Need by pytest. +mypy-extensions==0.4.3 +mysqlclient==1.4.2.post1 +nbconvert==5.3.1 +nbformat==4.4.0 # Needed by notebook. +networkx==2.2 +notebook==5.7.8 +numpy==1.16.4 +oauth2client==4.1.3 +packaging==19.2 # Needed by pytest. +pandas==0.24.1 +pandas-bokeh==0.0.2 +pandocfilters==1.4.2 +paramiko==2.6.0 # Needed by fs.ssh. +parso==0.1.1 +pathlib2==2.3.2 +pathlib==1.0.1 +pathspec==0.7.0 +pexpect==4.4.0 +pickleshare==0.7.4 +plotly==3.7.0 +pluggy==0.13.0 # Needed by pytest. +ply==3.11 +portpicker==1.3.1 +prettytable==0.7.2 +progressbar2==3.37.1 +prometheus_client==0.6.0 # Needed by notebook. +prompt-toolkit==1.0.15 +property-cached==1.6.3 +protobuf==3.6.1 +psutil==5.4.5 +psycopg2-binary==2.8.3 +ptyprocess==0.5.2 +py-cpuinfo==3.3.0 +py==1.5.2 +# At the time of writing (2018-05-10), the most recent version of pycparser is +# version 2.18, however, it breaks cldrive with error: +# Traceback (most recent call last): +# File "/phd/gpu/cldrive/legacy/__init__.py", line 7, in +# from gpu.cldrive.args import * +# File "/phd/gpu/cldrive/legacy/args.py", line 242, in +# __parser = OpenCLCParser() +# File "/site-packages/pycparserext/ext_c_parser.py", line 47, in __init__ +# debug=yacc_debug, write_tables=False) +# File "/site-packages/pycparser/ply/yacc.py", line 3426, in yacc +# raise YaccError('Unable to build parser') +# pycparser.ply.yacc.YaccError: Unable to build parser +pycparser==2.17 +pycparserext==2016.2 +pydot==1.4.1 +PyGithub==1.39 +Pygments==2.2.0 +pybind11==2.4.2 # Needed by pyopencl. +pyfiglet==0.8.post1 +pylint==1.8.2 +pyopencl==2018.2.5 +pyOpenSSL==19.1.0 # Needed by urllib3. +pyparsing==2.2.0 +pytest-benchmark==3.2.2 +pytest-cov==2.8.1 +pytest-mock==1.12.1 +pytest-shard==0.1.1 +pytest==5.3.1 +python-dateutil==2.6.1 +python-Levenshtein==0.12.0 +python-utils==2.3.0 +python-xmp-toolkit==2.0.1 +pytools==2018.1 +pytz==2018.3 +PyYAML==4.2b4 +pyzmq==17.0.0 +pyasn1==0.4.7 # Needed by oauth2client. +pyasn1_modules==0.2.7 # Needed by oauth2client. +qtconsole==4.3.1 +regex==2019.11.1 +reorder-python-imports==1.9.0 +requests==2.20.1 +retrying==1.3.3 # Needed by plotly. +rsa==4.0 # Needed by oauth2client. +scandir==1.7 +scikit-learn==0.20.3 +scikit-image==0.14.2 +scipy==1.2.1 +seaborn==0.9.0 +Send2Trash==1.5.0 +simplegeneric==0.8.1 +singledispatch==3.4.0.3 +six==1.11.0 # Needed by absl. +smmap2==2.0.3 +SQLAlchemy==1.3.10 +sqlparse==0.3.0 +statistics==1.0.3.5 +subprocess32==3.5.0 +tabulate==0.8.5 +tensorflow==1.14.0 # NOTE: Must be installed manually with `pip install`. +terminado==0.8.1 +testpath==0.3.1 +TogglPy==0.1.1 +toml==0.10.0 +torch==1.3.0 +tornado==5.0 +tqdm==4.38.0 +traitlets==4.3.2 +trash-cli==0.17.1.14 +typed-ast==1.4.1 +typing-extensions==3.7.4.1 +umap==0.1.1 +urllib3==1.24.2 # Needed by requests. +virtualenv==15.1.0 +wcwidth==0.1.7 +webencodings==0.5.1 +Werkzeug==0.15.3 # Needed by Flask. +wget==3.2 +widgetsnbextension==3.1.4 +wrapt==1.11.2 +zipp==0.6.0 # Needed by pytest. diff --git a/third_party/py/README.md b/third_party/py/README.md index 81805fd3e..fc364e3ef 100644 --- a/third_party/py/README.md +++ b/third_party/py/README.md @@ -8,9 +8,9 @@ group multiple packages under a single bazel target (see ## To add a package -1. Add the new pip packages to `//tools/requirements.txt`. +1. Add the new pip packages to `//:requirements.txt`. 1. Create a package in this directory which contains a single `py_library` rule - and pulls in the new pip package as a `dep` (copy any of the existing + and pulls in the new pip package as a `dep` (copy any of the existing packages as a starting point). 1. Add the `//third_party/py/` dep to any python targets which require this new module. diff --git a/third_party/py/tensorflow/BUILD b/third_party/py/tensorflow/BUILD new file mode 100644 index 000000000..5c0c43381 --- /dev/null +++ b/third_party/py/tensorflow/BUILD @@ -0,0 +1,54 @@ +# A wrapper around tensorflow pip package to support optional gpu. +# +# If a python target requires TensorFlow, it should depend on this package +# (i.e. //third_party/py/tensorflow), instead of requirement("tensorflow"). +# This is because the pip package for TensorFlow with CUDA support has a +# different name. +# +# Use: +# +# from third_party.py.tensorflow import tf +# +# to import Tensorflow rather than "import tensorflow as tf" due to a bug in +# packing Tensorflow as a pip dependency for bazel. +# See github.com/bazelbuild/rules_python/issues/71 + +load("@requirements//:requirements.bzl", "requirement") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) # Apache 2.0. + +exports_files(["LICENSE"]) + +py_library( + name = "tensorflow", + srcs = [":tf.py"], + deps = [ + # rules_pip fails for TensorFlow, causing an empty package to be + # downloaded. Because of this, we require the user to manually install + # the version of TensorFlow specified in requirements.txt: + # requirement("tensorflow"), + "//third_party/py/numpy", + "//third_party/py/protobuf", + ], +) + +py_test( + name = "smoke_test", + srcs = ["smoke_test.py"], + deps = [ + ":tensorflow", + "//labm8/py:app", + "//labm8/py:test", + "//third_party/py/gputil", + "//third_party/py/numpy", + "//third_party/py/pytest", + ], +) + +py_test( + name = "overview", + srcs = ["overview.py"], + deps = [":tensorflow"], +) diff --git a/tools/BUILD b/tools/BUILD index 6e17e3ec3..580b47156 100644 --- a/tools/BUILD +++ b/tools/BUILD @@ -7,7 +7,6 @@ sh_binary( srcs = ["whoami.sh"], data = [ "//:build_info_pbtxt", - "//:config", ], ) diff --git a/tools/whoami.sh b/tools/whoami.sh index e6dd6bc1d..08aa97b23 100755 --- a/tools/whoami.sh +++ b/tools/whoami.sh @@ -1,4 +1,3 @@ #!/usr/bin/env bash -cat config.pbtxt | grep -v '^#' cat build_info.pbtxt | grep -v '^#' diff --git a/tools/workspace_status.sh b/tools/workspace_status.sh index 4bc292aa9..c1fa83b8b 100755 --- a/tools/workspace_status.sh +++ b/tools/workspace_status.sh @@ -11,10 +11,11 @@ set -eu echo "SECONDS_SINCE_EPOCH $(date +%s)" echo "RANDOM_HASH $(cat /dev/urandom | head -c16 | md5sum 2>/dev/null | cut -f1 -d' ')" -# Stable keys. +# Stable keys. See the BuildInfo message schema in //:config.proto for a +# description of each fields. +echo "STABLE_UNSAFE_WORKSPACE" $(pwd) echo "STABLE_HOST $(hostname)" echo "STABLE_USER $(id -un)" -echo "STABLE_REPO_ROOT" $(pwd) echo "STABLE_VERSION" $(cat version.txt) echo "STABLE_GIT_COMMIT_HASH $(git rev-parse HEAD)"