diff --git a/docs/api-docs/__init__.py b/docs/__init__.py similarity index 100% rename from docs/api-docs/__init__.py rename to docs/__init__.py diff --git a/docs/api-docs/_templates/autosummary/base.rst b/docs/_templates/autosummary/base.rst similarity index 100% rename from docs/api-docs/_templates/autosummary/base.rst rename to docs/_templates/autosummary/base.rst diff --git a/docs/api-docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst similarity index 100% rename from docs/api-docs/_templates/autosummary/class.rst rename to docs/_templates/autosummary/class.rst diff --git a/docs/api-docs/_templates/autosummary/module.rst b/docs/_templates/autosummary/module.rst similarity index 100% rename from docs/api-docs/_templates/autosummary/module.rst rename to docs/_templates/autosummary/module.rst diff --git a/docs/api-docs/index.rst b/docs/api-docs/index.rst index 8700953e..215ecb9e 100644 --- a/docs/api-docs/index.rst +++ b/docs/api-docs/index.rst @@ -1,11 +1,5 @@ -Guppy Compiler API Documentation -================================ - -This is the API documentation for the Guppy compiler. - -.. note:: - This page is designed for contributors to the Guppy compiler, not users of the language. - See TODO for the language documentation. +Compiler API Docs +================= .. autosummary:: :toctree: generated diff --git a/docs/build.sh b/docs/build.sh index 794cfa11..63277a29 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -4,4 +4,4 @@ mkdir build touch build/.nojekyll # Disable jekyll to keep files starting with underscores -uv run --extra docs sphinx-build -b html ./api-docs ./build/api-docs +uv run --extra docs sphinx-build -b html . ./build diff --git a/docs/api-docs/conf.py b/docs/conf.py similarity index 96% rename from docs/api-docs/conf.py rename to docs/conf.py index 3bacce41..475c6e8a 100644 --- a/docs/api-docs/conf.py +++ b/docs/conf.py @@ -33,7 +33,7 @@ }, } -html_static_path = ["../_static"] +html_static_path = ["_static"] html_css_files = ["custom.css"] autosummary_generate = True @@ -104,4 +104,5 @@ def resolve_type_aliases(app, env, node, contnode): def setup(app): - app.connect("missing-reference", resolve_type_aliases) + pass + # app.connect("missing-reference", resolve_type_aliases) diff --git a/docs/dev-guide/checking.rst b/docs/dev-guide/checking.rst new file mode 100644 index 00000000..0381feaf --- /dev/null +++ b/docs/dev-guide/checking.rst @@ -0,0 +1,108 @@ +Checking a Function +=================== + +This chapter walks through the various phases of type and linearity checking for function definitions. + +.. toctree:: + :maxdepth: 1 + + checking/types + + +Our starting point is a :class:`.CheckedFunctionDef` storing the parsed abstract syntax tree of our function in form of a :class:`ast.FunctionDef`. +The top-level entry point for the checking logic is :meth:`.ParsedFunctionDef.check` and subsequent code in the :mod:`.func_checker` module. + + +1. CFG Construction +------------------- + +Before doing any checking, we actually begin by transforming the function into a control-flow graph. +This is slightly unusual compared to other compilers that do this at a later stage (usually after semantic analysis). +In Guppy, we do it earlier to cope with dynamic variable assignments in Python that require reasoning about control-flow. + +Motivation +^^^^^^^^^^ + +We want to be permissive about changing the type of a variable, for example the following should be fine: + +.. code-block:: python + + x = 1 + use_int(x) + if cond: + x = 1.0 + use_float(x) + else: + use_int(x) + +On the other hand, we need to reject programs if the type is not unique: + +.. code-block:: python + + if cond: + x = 1 + else: + x = 1.0 + use(x) # Error: x could be int or float + +To achieve this, we run type checking on a CFG instead of the pure AST. + +CFG Builder +^^^^^^^^^^^ + +Control-flow graphs are defined in the :mod:`guppylang.cfg` module. +They are made up of basic blocks :class:`.BB` whose statements are restricted to :data:`.BBStatement` AST nodes. +In particular, this excludes control-flow statements like conditionals or loops. + +We construct CFGs using the :class:`.CFGBuilder` AST visitor. +This visitor also turns control-flow expressions like short-circuiting boolean logic or ``if`` expressions into explicit control-flow. +Furthermore, it does some light desugaring for list comprehensions, ``py(...)`` expressions, and walrus expressions (i.e. ``x := expr``). + + +2. Name Analysis +---------------- + +After the CFG is constructed, we start with the checking. +The entry point for this is the :func:`check_cfg` function. +However, before looking at types, we first do a name analysis pass that checks if variables are definitely assigned before they are used. +For example, the following code should be rejected: + +.. code-block:: python + + if cond() or (y := foo()): + return y # Error: y not defined if `cond()` is true + return y # y is defined here + +This check is done using standard dataflow analysis on the CFG. +We begin by collecting which variables are defined and used in each basic block. +This is done by the :class:`.VariableVisitor`, annotating each BB with a :class:`.VariableStats`. +Our dataflow analysis framework is implemented in the :mod:`.cfg.analysis` module and is triggered via the :meth:`.CFG.analyze` method. + + +3. Type Checking +---------------- + +After we have checked that the names used in a BB are definitely assigned, we can start to type-check the BB. +By visiting the blocks in BFS order, we make sure that we have already determined the types of the variables used in a BB once we get to it. +We also make sure that the input types flowing into a BB match up across all its predecessors. +This way we detect programs where the types of variables differs between different control-flow branches. +The details of type checking and inference is explained in TODO. + +After type checking, every AST expression will be annotated with a type. +They can be queried via the :func:`.get_type` function. + +.. note:: + + In the future, it would be nice to statically enforce this with mypy, e.g. by defining something like a ``TypedAST`` + +Furthermore, type checking replaces some AST nodes with more fine-grained versions. +For example, :class:`ast.Name` nodes are turned into either a :class:`.GlobalName` for global variables or a :class:`.PlaceNode` for +local variable (see TODO for more on places). +Similar for other custom nodes defined in :mod:`.nodes` module. + + +4. Linearity Checking +--------------------- + +TODO + diff --git a/docs/dev-guide/checking/types.rst b/docs/dev-guide/checking/types.rst new file mode 100644 index 00000000..25d1e05c --- /dev/null +++ b/docs/dev-guide/checking/types.rst @@ -0,0 +1,123 @@ +Representing Types +================== + +The :mod:`guppylang.tys` modules defined how the Guppy compiler represents types internally. + + +Kinds of Types +-------------- + +Guppy types are represented via an algebraic data type represented by the :data:`.Type` union. +It contains the following core types: + +* :class:`.NoneType` represents the unit type ``None``. +* :class:`.NumericType` represents the types ``nat``, ``int``, and ``float``. +* :class:`.TupleType` is the type of tuples ``tuple[T1, T2, ...]`` +* :class:`.OpaqueType` corresponds to types that are directly specified via a Hugr lowering. + Examples include ``list[T]``, ``array[T, n]``, and ``qubit``. + They are define via a :class:`.OpaqueTypeDef`. +* :class:`.StructType` represents user-defined struct types (see below for details). +* :class:`.FunctionType` represents (possibly generic) function types. +* :class:`.BoundTypeVar` is a type variable that is bound to a generic parameter. + For example, the ``T`` in the generic function type ``forall T. T -> T``. + They are identified by their de Bruijn index. +* :class:`.ExistentialTypeVar` represents variables that are used during type inference. + They stand for concrete types that have not been inferred yet and are identified by a globally unique id. + +All types inherit from :class:`.TypeBase` abstract base class which defines common behaviour of all types, +for example, querying whether a type is linear or lowering a type to Hugr. + + +Struct Types +------------ + +Consider a struct type like ``MyStruct[int]`` where ``MyStruct`` is defined like + +.. code-block:: python + + @guppy.struct + class MyStruct(Generic[T]): + x: int + y: T + +The type ``MyStruct[int]`` is represented as an instance of :class:`.StructType`: + +.. code-block:: python + + StructType(defn: CheckedStructDef, args: Sequence[Argument]) + +Struct types are made up of two parts: + +* The :class:`.CheckedStructDef` identifies the struct but without concrete values for the generic parameters. + In the example above, this is the ``MyStruct`` part. +* The :data:`.Argument` sequence identifies the concrete instantiation for the generic parameters of the struct. + In the example above, this would be the ``[int]`` part. + See below for more details on what exactly an "argument" is. + +The benefit of splitting struct types up in this way is that it makes substitution and equality testing easier: +Turning a ``MyStruct[S]``, into a ``MyStruct[T]`` is very cheap. +Substituting the arguments deep into the struct fields would be a lot more costly. +This matches up with Guppy structs being `nominal types `_, i.e. +two struct types are equivalent if they have the same definition and their generic arguments are equivalent. + +Note that we use the same representation for all other generic types in Guppy, i.e. +:class:`.TupleType`, :class:`.FunctionType` , :class:`.OpaqueType`, and :class:`.StructType`. +They all inherit from :class:`.ParametrizedTypeBase` and provide their generic arguments via the :attr:`.ParametrizedTypeBase.args` field. + + +Generic Arguments +----------------- + +Guppy supports two kinds of generic arguments, captured via the :data:`.Argument` union type. +A good example of this is the type ``array[int, 10]``: + +* ``int`` is a :class:`.TypeArg`, i.e. a generic argument of kind *type*. +* ``10`` is a :class:`.ConstArg`, i.e. a generic argument representing a compile-time constant value. + +Note that constant arguments don't need to be literals, they could also be a generic constant variables. +(for example, the ``n`` in the function ``def foo[n](xs: array[int, n]) -> None``). +See :data:`.Const` for what constitutes a valid constant value. + + +Generic Parameters and Variables +-------------------------------- + +Going back to the example struct + +.. code-block:: python + + @guppy.struct + class MyStruct(Generic[T]): + x: int + y: T + +we have now seen how the compiler represents the type arguments in a concrete type ``MyStruct[int]``. +However, we also need a way to represent the generic parameter ``T`` *within the definition* of ``MyStruct``. +We call these *generic parameters* and represent them via the :data:`.Parameter` union type. +The structure is similar to generic arguments: + +* :class:`.TypeParam` represent parameters of kind type that can be instantiated with a :class:`.TypeArg`. +* :class:`.ConstParam` represents constant value parameters that can be instantiated with a :class:`.ConstArg` of matching type. + +These parameters are stored within the struct definition in the :attr:`.CheckedStructDef.params` field. +In the struct field types, generic type variables are represented via a :class:`.BoundTypeVar` that refers to the *index* of +the corresponding type parameter in the :attr:`.CheckedStructDef.params` sequence. +The same applies to generic const values which are represented via a :class:`.BoundConstVar` pointing to the corresponding const parameter. + +Note that we also use :data:`.Parameter` to represent the generic arguments of functions types. +For example, the function ``def foo[T, n](xs: array[T, n]) -> None`` has two parameters ``T`` and ``n``. +They are stored in the :attr:`.FunctionType.params` field. + + +Type Transformers +----------------- + +Many operations on types require recursing into the type tree and looking at all intermediate nodes or leafs. +To make this recursive traversal easier, we have implemented the :class:`Transformable` interfaces +for all objects that contain types or const values. + +By subclassing :class:`.Transformer` and implementing the :meth:`.Transformer.transform` method, we can do a custom traversal. +The ``transform`` method is going to be called for every type and const value in the type tree. +We can either return ``None`` to continue the recursive traversal, or return a new type to replace the old one in the type tree. +See :class:`.Substituter` and :class:`.Instantiator` for two examples of this pattern. + diff --git a/docs/dev-guide/index.rst b/docs/dev-guide/index.rst new file mode 100644 index 00000000..59dd0065 --- /dev/null +++ b/docs/dev-guide/index.rst @@ -0,0 +1,11 @@ +Compiler Development Guide +========================== + +This guide is designed to document how the Guppy compiler works and help new contributers get involved with Guppy. + +.. toctree:: + :maxdepth: 1 + + overview + checking + diff --git a/docs/dev-guide/overview.rst b/docs/dev-guide/overview.rst new file mode 100644 index 00000000..138bef36 --- /dev/null +++ b/docs/dev-guide/overview.rst @@ -0,0 +1,57 @@ +Overview +======== + +This chapter gives an overview of the overall compilation pipeline and how everything fits together. + + +What the ``@guppy`` decorator does +---------------------------------- + +Guppy programs are defined by decorating regular Python functions or classes with the ``@guppy`` decorator. +At this point, no compilation or checking is taking place yet, i.e. decorating is an infallible operation. +The only thing the decorator does is registering that an object needs to be handled later when the user triggers compilation. + +Internally, we represent these decorated objects as "raw definitions" (see TODO for more on definitions). +For example, a decorated function is stored as a :class:`.RawFunctionDef` that holds a reference to the Python function that was decorated. +Function declarations, structs, type variables -- essentially everything the user can define -- are handled in a similar way. + +All of these raw definitions are stored in a :class:`.GuppyModule`. +Modules are our main compilation units and by default, each Python file corresponds to a :class:`.GuppyModule`. +The ``@guppy`` decorator tries to find the Python file from which it was invoked and registers the raw definition with the corresponding module. +See TODO for more on Guppy's module system. + + +Compilation Pipeline +-------------------- + +There are various ways the user can trigger compilation of a module, but all paths lead to the :meth:`.GuppyModule.compile` method. +In this method, all registered raw definitions are processed in various stages. + +Parsing +^^^^^^^ + +We use Python's :mod:`inspect` module to look up the source for decorated objects. +Then, we use Python's :func:`ast.parse` to turn this source into an abstract syntax tree (AST). +See :func:`.parse_py_func` for an example of this in action. +Throughout the whole compilation pipeline, we mostly use the builtin AST representation provided by Python. +Any additional AST nodes we need are defined in the :mod:`.nodes` module. + +During parsing, we turn our raw definition into a "parsed definition". +For example, a :class:`.RawFunctionDef` is turned into a :class:`.ParsedFunctionDef` that can then be processed further. + +Checking +^^^^^^^^ + +Next, we do variety of things like name analysis, type inference, type checking, and linearity checking. +These are described in detail in TODO. +The result of this phase is a checked definition, for example a :class:`.CheckedFunctionDef` + +Lowering to HUGR +^^^^^^^^^^^^^^^^ + +Finally, we lower all definition to our `HUGR intermediate representation `_. +Internally, we refer to this stage as "compiling" since this is the final stage done by the Guppy compiler. +For example, we turn our :class:`.CheckedFunctionDef` into a :class:`.CompiledFunctionDef`. +From here on, the IR is handed off to tools like `tket2 `_ for optimisation +and `hugr-llvm `_ for further lowering anc machine-code generation. + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..74e129bc --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,17 @@ +Guppy Compiler Development +========================== + +This page collects resources for Guppy compiler developers. + +.. note:: + This page is designed for contributors to the Guppy compiler, not users of the language. + See TODO for the language documentation. + + + +.. toctree:: + :maxdepth: 1 + + dev-guide/index + api-docs/index + diff --git a/guppylang/cfg/bb.py b/guppylang/cfg/bb.py index aea8b7a8..fc0a03cf 100644 --- a/guppylang/cfg/bb.py +++ b/guppylang/cfg/bb.py @@ -30,6 +30,7 @@ class VariableStats(Generic[VId]): used: dict[VId, AstNode] = field(default_factory=dict) +#: AST statements that are valid inside a basic block BBStatement = ( ast.Assign | ast.AugAssign diff --git a/guppylang/definition/struct.py b/guppylang/definition/struct.py index 521b5549..455b3490 100644 --- a/guppylang/definition/struct.py +++ b/guppylang/definition/struct.py @@ -189,6 +189,7 @@ class CheckedStructDef(TypeDef, CompiledDef): """A struct definition that has been fully checked.""" defined_at: ast.ClassDef + #: Generic parameters of this struct params: Sequence[Parameter] fields: Sequence[StructField] diff --git a/guppylang/tys/arg.py b/guppylang/tys/arg.py index d349f452..7d3396bf 100644 --- a/guppylang/tys/arg.py +++ b/guppylang/tys/arg.py @@ -13,12 +13,12 @@ from guppylang.tys.ty import Type -# We define the `Argument` type as a union of all `ArgumentBase` subclasses defined -# below. This models an algebraic data type and enables exhaustiveness checking in -# pattern matches etc. -# Note that this might become obsolete in case the `@sealed` decorator is added: -# * https://peps.python.org/pep-0622/#sealed-classes-as-algebraic-data-types -# * https://github.com/johnthagen/sealed-typing-pep +#: We define the `Argument` type as a union of all `ArgumentBase` subclasses defined +#: below. This models an algebraic data type and enables exhaustiveness checking in +#: pattern matches etc. +#: Note that this might become obsolete in case the `@sealed` decorator is added: +#: * https://peps.python.org/pep-0622/#sealed-classes-as-algebraic-data-types +#: * https://github.com/johnthagen/sealed-typing-pep Argument: TypeAlias = "TypeArg | ConstArg" diff --git a/guppylang/tys/param.py b/guppylang/tys/param.py index ceee77e6..ad4158b1 100644 --- a/guppylang/tys/param.py +++ b/guppylang/tys/param.py @@ -17,12 +17,12 @@ from guppylang.tys.ty import Type -# We define the `Parameter` type as a union of all `ParameterBase` subclasses defined -# below. This models an algebraic data type and enables exhaustiveness checking in -# pattern matches etc. -# Note that this might become obsolete in case the `@sealed` decorator is added: -# * https://peps.python.org/pep-0622/#sealed-classes-as-algebraic-data-types -# * https://github.com/johnthagen/sealed-typing-pep +#: We define the `Parameter` type as a union of all `ParameterBase` subclasses defined +#: below. This models an algebraic data type and enables exhaustiveness checking in +#: pattern matches etc. +#: Note that this might become obsolete in case the `@sealed` decorator is added: +#: * https://peps.python.org/pep-0622/#sealed-classes-as-algebraic-data-types +#: * https://github.com/johnthagen/sealed-typing-pep Parameter: TypeAlias = "TypeParam | ConstParam" diff --git a/guppylang/tys/ty.py b/guppylang/tys/ty.py index 976604c7..524b7169 100644 --- a/guppylang/tys/ty.py +++ b/guppylang/tys/ty.py @@ -86,6 +86,7 @@ class ParametrizedTypeBase(TypeBase, ABC): Note that all subclasses are expected to be immutable. """ + #: The generic arguments of this type args: Sequence[Argument] def __post_init__(self) -> None: @@ -324,6 +325,7 @@ class FunctionType(ParametrizedTypeBase): inputs: Sequence[FuncInput] output: "Type" + #: Generic parameters of the function. params: Sequence[Parameter] input_names: Sequence[str] | None