Kaleidoscope Abstract Syntax Tree
As with many language parsing systems Kaleidoscope leverages an Abstract Syntax Tree (AST) to simplify generating code from the parsed language. Each type of node in the tree implements the IAstNode interface
This interface provides the basic properties of any node in the tree for common uses. The Kaleidoscope language is a simple one and, therefore, has only a few kinds of nodes. The AST consist of the following basic categories of nodes:
AST Node
The IAstNode interface forms the common interface for all AST nodes, it provides the common properties for all nodes.
// -----------------------------------------------------------------------
// <copyright file="IAstNode.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using OpenSoftware.DgmlTools.Model;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
/// <summary>Root interface for nodes in the Abstract Syntax Tree</summary>
public interface IAstNode
{
/// <summary>Gets the source location covering the original source for the node</summary>
SourceSpan Location { get; }
/// <summary>Gets a collection of children for the node</summary>
IEnumerable<IAstNode> Children { get; }
/// <summary>Visitor pattern support for implementations to dispatch the concrete node type to a visitor</summary>
/// <typeparam name="TResult">Result type for the visitor</typeparam>
/// <param name="visitor">Visitor to dispatch the concrete type to</param>
/// <returns>Result of visiting this node</returns>
TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class;
}
/// <summary>Extensions for IAstNode</summary>
/// <remarks>
/// While default interface methods seems like a great idea, it's not yet complete enough to be useful.
/// In particular there's pretty much no debugger support for evaluating such things, leaving you
/// with no way to see what they produce when used as a property. Hopefully, that will be resolved in
/// the future - but for now it is more a hindrance than it is a help.
/// </remarks>
public static class AstNodeExtensions
{
/// <summary>Gets the complete collection of errors for this node and children</summary>
/// <param name="node">Node to traverse for errors</param>
/// <remarks>Traverses the node hierarchy to find all error node at any depth</remarks>
/// <returns>Collection of errors found</returns>
public static IReadOnlyCollection<ErrorNode> CollectErrors( [ValidatedNotNull] this IAstNode node )
{
node.ValidateNotNull( nameof( node ) );
var collector = new ErrorNodeCollector();
node.Accept<string>( collector );
return collector.Errors;
}
public static DirectedGraph CreateGraph( [ValidatedNotNull] this IAstNode node )
{
node.ValidateNotNull( nameof( node ) );
var generator = new AstGraphGenerator();
node.Accept( generator );
return generator.Graph;
}
}
}
Function Definition
FunctionDefinition, as the name implies, contains the definition of a function. This includes the signature and the full body of the function.
// -----------------------------------------------------------------------
// <copyright file="FunctionDefinition.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using System.Collections.Immutable;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
/// <summary>AST type for a function definition</summary>
/// <remarks>
/// This supports construction of anonymous functions
/// that do not have an explicit prototype in the source language
/// </remarks>
public class FunctionDefinition
: IAstNode
{
public FunctionDefinition( SourceSpan location
, Prototype signature
, IExpression body
, bool isAnonymous = false
)
: this( location, signature, body, ImmutableArray.Create<LocalVariableDeclaration>( ), isAnonymous )
{
}
public FunctionDefinition( SourceSpan location
, Prototype signature
, IExpression body
, IImmutableList<LocalVariableDeclaration> localVariables
, bool isAnonymous = false
)
{
Signature = signature;
Body = body;
IsAnonymous = isAnonymous;
Location = location;
LocalVariables = localVariables;
}
public SourceSpan Location { get; }
/// <summary>Gets the prototype signature for the function</summary>
public Prototype Signature { get; }
/// <summary>Gets the body of the function as an expression tree</summary>
public IExpression Body { get; }
/// <summary>Gets a value indicating whether this function is an anonymous top level expression</summary>
/// <remarks>This is useful during generation as anonymous expressions are discardable once they are generated</remarks>
public bool IsAnonymous { get; }
public string Name => Signature.Name;
public IReadOnlyList<ParameterDeclaration> Parameters => Signature.Parameters;
public IReadOnlyList<LocalVariableDeclaration> LocalVariables { get; }
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public IEnumerable<IAstNode> Children
{
get
{
yield return Signature;
yield return Body;
}
}
public override string ToString( )
{
return $"{Signature}{{{Body}}}";
}
}
}
Function Declaration
// -----------------------------------------------------------------------
// <copyright file="ProtoType.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Text;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
/// <summary>Encapsulates data describing a function signature</summary>
/// <remarks>
/// This is used to enable consistent representation when the prototype
/// is synthesized during code generation (i.e. Anonymous expressions)
/// </remarks>
public class Prototype
: IAstNode
{
/// <summary>Initializes a new instance of the <see cref="Prototype"/> class.</summary>
/// <param name="location">Source location covering the complete signature</param>
/// <param name="name"><see cref="Name"/> containing the name of the function</param>
/// <param name="parameters">Collection of <see cref="Name"/>s for the names of each parameter</param>
public Prototype( SourceSpan location, string name, params ParameterDeclaration[ ] parameters )
: this( location, name, false, false, parameters )
{
}
/// <summary>Initializes a new instance of the <see cref="Prototype"/> class.</summary>
/// <param name="location">Source location covering the complete signature</param>
/// <param name="name"><see cref="Name"/> containing the name of the function</param>
/// <param name="parameters">Collection of <see cref="Name"/>s for the names of each parameter</param>
public Prototype( SourceSpan location, string name, IEnumerable<ParameterDeclaration> parameters )
: this( location, name, false, false, parameters )
{
}
/// <summary>Initializes a new instance of the <see cref="Prototype"/> class.</summary>
/// <param name="name">Name of the function</param>
/// <param name="parameters">Names of each parameter</param>
/// <remarks>
/// This version of the constructor is used to create synthetic prototypes that don't
/// exist within the original source.
/// </remarks>
public Prototype( string name, params string[ ] parameters )
: this( default, name, false, false, parameters.Select( ( n, i ) => new ParameterDeclaration( default, name, i ) ) )
{
}
/// <summary>Initializes a new instance of the <see cref="Prototype"/> class.</summary>
/// <param name="location">Source location covering the complete signature</param>
/// <param name="name">name of the function</param>
/// <param name="isCompilerGenerated">Indicates if this is a compiler generated prototype</param>
public Prototype( SourceSpan location, string name, bool isCompilerGenerated )
: this( location, name, isCompilerGenerated, false, Enumerable.Empty<ParameterDeclaration>( ) )
{
}
/// <summary>Initializes a new instance of the <see cref="Prototype"/> class.</summary>
/// <param name="location">Source location covering the complete signature</param>
/// <param name="name">name of the function</param>
/// <param name="isCompilerGenerated">Indicates if this is a compiler generated prototype</param>
/// <param name="isExtern">Indicates if this is an external prototype</param>
/// <param name="parameters">names of each parameter</param>
public Prototype( SourceSpan location, string name, bool isCompilerGenerated, bool isExtern, IEnumerable<ParameterDeclaration> parameters )
{
Location = location;
Name = name;
Parameters = parameters.ToImmutableArray( );
IsCompilerGenerated = isCompilerGenerated;
IsExtern = isExtern;
}
/// <inheritdoc/>
public SourceSpan Location { get; }
/// <summary>Gets the name of the function</summary>
public string Name { get; }
/// <summary>Gets a value indicating whether the function prototype is an extern declaration</summary>
public bool IsExtern { get; }
/// <summary>Gets a value indicating whether the function prototype is generated internally by compiler</summary>
public bool IsCompilerGenerated { get; }
/// <summary>Gets the parameters for the function</summary>
public IReadOnlyList<ParameterDeclaration> Parameters { get; }
/// <inheritdoc/>
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
/// <inheritdoc/>
public IEnumerable<IAstNode> Children => Parameters;
/// <inheritdoc/>
public override string ToString( )
{
var bldr = new StringBuilder( );
if( IsExtern )
{
bldr.Append( "[extern]" );
}
if( IsCompilerGenerated )
{
bldr.Append( "[CompilerGenerated]" );
}
bldr.Append( Name );
bldr.Append( '(' );
if( Parameters.Count > 0 )
{
bldr.Append( string.Join( ", ", Parameters.Select( p => p.ToString( ) ) ) );
}
bldr.Append( ')' );
return bldr.ToString( );
}
}
}
Variable Declaration
IVariableDeclaration is implemented by local variable declarations and parameter declarations. The interface abstracts the differences between the two types of variable declarations for most common cases. Most code generation or AST consumers don't care about the differences (i.e. Parameters have an index but locals don't)
// -----------------------------------------------------------------------
// <copyright file="IVariableDeclaration.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
namespace Kaleidoscope.Grammar.AST
{
public interface IVariableDeclaration
: IAstNode
{
string Name { get; }
bool CompilerGenerated { get; }
}
}
Local Variable
// -----------------------------------------------------------------------
// <copyright file="LocalVariableDeclaration.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using System.Text;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
public class LocalVariableDeclaration
: IVariableDeclaration
{
public LocalVariableDeclaration( SourceSpan location, string name, IExpression? initializer, bool compilerGenerated = false )
{
Location = location;
Name = name;
Initializer = initializer;
CompilerGenerated = compilerGenerated;
}
public SourceSpan Location { get; }
public string Name { get; }
public IExpression? Initializer { get; }
public bool CompilerGenerated { get; }
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public IEnumerable<IAstNode> Children
{
get
{
if( Initializer != null )
{
yield return Initializer;
}
}
}
public override string ToString( )
{
var bldr = new StringBuilder();
if( CompilerGenerated )
{
bldr.Append( "[CompilerGenerated]" );
}
bldr.Append( "Declare(" );
bldr.Append( Name );
if( Initializer != null )
{
bldr.Append( ", " );
bldr.Append( Initializer );
}
bldr.Append( ')' );
return bldr.ToString( );
}
}
}
Function Parameter
// -----------------------------------------------------------------------
// <copyright file="ParameterDeclaration.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using System.Linq;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
public class ParameterDeclaration
: IVariableDeclaration
{
public ParameterDeclaration( SourceSpan location, string name, int index )
{
Location = location;
Name = name;
Index = index;
}
public SourceSpan Location { get; }
public string Name { get; }
public int Index { get; }
public bool CompilerGenerated => false;
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public IEnumerable<IAstNode> Children => Enumerable.Empty<IAstNode>( );
public override string ToString( )
{
return Name;
}
}
}
Expression
Kaleidoscope is a functional language, all expressions produce a value, even if it is always zero. There are no statements in the language. Expressions form the core of the language and the bulk of the AST.
The IExpression interface forms the common interface for all AST expression nodes
// -----------------------------------------------------------------------
// <copyright file="IExpression.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
namespace Kaleidoscope.Grammar.AST
{
/// <summary>This is a grouping interface for all AST nodes that are valid expressions</summary>
/// <remarks>
/// This is a grouping interface to allow other parts of the system to distinguish between
/// an arbitrary node and one that is ultimately an expression. This helps to ensure correctness
/// (e.g. a function declaration is not valid as an argument to an operator, only an expression is.
/// </remarks>
public interface IExpression
: IAstNode
{
}
}
While this is an empty interface, it serves to distinguish between AST nodes that are not expressions. Thus providing some type safety for consumers. (i.e. it makes no sense to have a prototype as the operand for a binary operator so only nodes that implement the IExpression tag interface are allowed) This isn't a common or generally recommended pattern for interfaces but makes sense here since some form of differentiation is needed.
Unary Operator
Unary operators are all user defined, so the AST simply represents them as a Function Definition. No additional node types are needed for unary operators in the AST.
Binary Operator
BinaryOperatorExpression covers the built-in operators, any user defined binary operators are transformed to a function declaration/definition
// -----------------------------------------------------------------------
// <copyright file="BinaryOperatorExpression.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
public enum BuiltInOperatorKind
{
Invalid,
Assign,
Add,
Subtract,
Multiply,
Divide,
Less,
Pow
}
/// <summary>AST Expression node for a binary operator</summary>
public class BinaryOperatorExpression
: IExpression
{
/// <summary>Initializes a new instance of the <see cref="BinaryOperatorExpression"/> class.</summary>
/// <param name="location">Source location of the operator expression</param>
/// <param name="lhs">Left hand side expression for the operator</param>
/// <param name="op">Operator type</param>
/// <param name="rhs">Right hand side expression for the operator</param>
public BinaryOperatorExpression( SourceSpan location, IExpression lhs, BuiltInOperatorKind op, IExpression rhs )
{
Location = location;
Left = lhs;
Op = op;
Right = rhs;
}
/// <inheritdoc/>
public SourceSpan Location { get; }
/// <summary>Gets the left hand side expression</summary>
public IExpression Left { get; }
/// <summary>Gets the operator kind for this operator</summary>
public BuiltInOperatorKind Op { get; }
/// <summary>Gets the Operator name for this expression</summary>
public string Name => Op.ToString( );
/// <summary>Gets the Right hand side expression</summary>
public IExpression Right { get; }
/// <inheritdoc/>
public IEnumerable<IAstNode> Children
{
get
{
yield return Left;
yield return Right;
}
}
/// <inheritdoc/>
public virtual TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public override string ToString( )
{
return $"{Name}({Left},{Right})";
}
}
}
The properties are fairly self explanatory, including the kind of operator and the left and right sides of the operator. The normal code generator pattern for the binary operators is:
- Generate code for the left side expression to a new value
- Generate code for the right side expression to a new value
- Apply the operator to the generated left and right values
- Return the result
Assignment
Assignment is a special kind of binary operator to represent "store" semantics for a variable. (e.g. mutable variables). Code generation for the assignment must handle the left side operand with a slightly different pattern. In particular, the left hand side is not an evaluated expression. Instead, it is the variable to assign the right hand value to. Thus, there isn't anything to evaluate for the left hand side as it is always a Variable Reference for the variable to assign the value to.
Function Call
Calls to functions (extern, user defined operators, or user defined functions) are represented in the AST as a FunctionCallExpression. The FunctionCallExpression contains the declaration of the function to call along with expressions for all of the arguments to the function.
// -----------------------------------------------------------------------
// <copyright file="FunctionCallExpression.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
public class FunctionCallExpression
: IExpression
{
public FunctionCallExpression( SourceSpan location, Prototype functionPrototype, IEnumerable<IExpression> args )
{
Location = location;
FunctionPrototype = functionPrototype;
Arguments = args.ToImmutableArray( );
}
public FunctionCallExpression( SourceSpan location, Prototype functionPrototype, params IExpression[ ] args )
: this( location, functionPrototype, ( IEnumerable<IExpression> )args )
{
}
public SourceSpan Location { get; }
public Prototype FunctionPrototype { get; }
public IReadOnlyList<IExpression> Arguments { get; }
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public IEnumerable<IAstNode> Children
{
get
{
yield return FunctionPrototype;
}
}
public override string ToString( )
{
if( Arguments.Count == 0 )
{
return $"Call({FunctionPrototype})";
}
return $"Call({FunctionPrototype}, {string.Join( ",", Arguments.Select( a => a.ToString( ) ) )})";
}
}
}
Variable Reference
A variable reference is used to refer to a variable. In most cases this represents implicit "load" semantics for a variable. However, when used as the left hand side of an assignment operator, it has "store" semantics.
// -----------------------------------------------------------------------
// <copyright file="VariableReferenceExpression.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
public class VariableReferenceExpression
: IExpression
{
public VariableReferenceExpression( SourceSpan location, IVariableDeclaration declaration )
{
Location = location;
Declaration = declaration;
}
public SourceSpan Location { get; }
public IVariableDeclaration Declaration { get; }
public string Name => Declaration.Name;
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public IEnumerable<IAstNode> Children
{
get
{
yield return Declaration;
}
}
public override string ToString( )
{
return $"Load({Name})";
}
}
}
Conditional
In Kaleidoscope conditional expressions follow the familiar if/then/else form, even though they are really more
like the ternary operator expression ( x ? y : z )
in C and related languages.
// -----------------------------------------------------------------------
// <copyright file="ConditionalExpression.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
public class ConditionalExpression
: IExpression
{
public ConditionalExpression( SourceSpan location
, IExpression condition
, IExpression thenExpression
, IExpression elseExpression
, LocalVariableDeclaration resultVar
)
{
Location = location;
Condition = condition;
ThenExpression = thenExpression;
ElseExpression = elseExpression;
ResultVariable = resultVar;
}
public SourceSpan Location { get; }
public IExpression Condition { get; }
public IExpression ThenExpression { get; }
public IExpression ElseExpression { get; }
// compiler generated result variable supports building conditional
// expressions without the need for SSA form by using mutable variables
// The result is assigned a value from both sides of the branch. In
// pure SSA form this isn't needed as a PHI node would be used instead.
public LocalVariableDeclaration ResultVariable { get; }
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public IEnumerable<IAstNode> Children
{
get
{
yield return Condition;
yield return ThenExpression;
yield return ElseExpression;
}
}
public override string ToString( )
{
return $"Conditional({Condition}, {ThenExpression}, {ElseExpression})";
}
}
}
For-In
The for in expression is used to implement loops in Kaleidoscope.
// -----------------------------------------------------------------------
// <copyright file="ForInExpression.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
public class ForInExpression
: IExpression
{
public ForInExpression( SourceSpan location
, LocalVariableDeclaration loopVariable
, IExpression condition
, IExpression step
, IExpression body
)
{
Location = location;
LoopVariable = loopVariable;
Condition = condition;
Step = step;
Body = body;
}
public SourceSpan Location { get; }
public LocalVariableDeclaration LoopVariable { get; }
public IExpression Condition { get; }
public IExpression Step { get; }
public IExpression Body { get; }
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public IEnumerable<IAstNode> Children
{
get
{
yield return LoopVariable;
yield return Condition;
yield return Step;
yield return Body;
}
}
public override string ToString( )
{
return $"for({LoopVariable}, {Condition}, {Step}, {Body})";
}
}
}
Var-In
Var-In Expression is used to provide, potentially nested, local scopes for variables
// -----------------------------------------------------------------------
// <copyright file="VarInExpression.cs" company="Ubiquity.NET Contributors">
// Copyright (c) Ubiquity.NET Contributors. All rights reserved.
// </copyright>
// -----------------------------------------------------------------------
using System.Collections.Generic;
using System.Text;
using Ubiquity.ArgValidators;
namespace Kaleidoscope.Grammar.AST
{
public class VarInExpression
: IExpression
{
public VarInExpression( SourceSpan location, IEnumerable<LocalVariableDeclaration> localVariables, IExpression body )
{
Location = location;
LocalVariables = localVariables;
Body = body;
}
public SourceSpan Location { get; }
public IEnumerable<LocalVariableDeclaration> LocalVariables { get; }
public IExpression Body { get; }
public TResult? Accept<TResult>( IAstVisitor<TResult> visitor )
where TResult : class
{
return visitor.ValidateNotNull( nameof( visitor ) ).Visit( this );
}
public IEnumerable<IAstNode> Children
{
get
{
foreach( var local in LocalVariables )
{
yield return local;
}
yield return Body;
}
}
public override string ToString( )
{
var bldr = new StringBuilder( "VarIn{" );
bldr.AppendJoin( ',', LocalVariables )
.Append( "}(" )
.Append( Body )
.Append( ')' );
return bldr.ToString( );
}
}
}