From 46e1a1e2ce2807440e78c338e17d0ee5d6e2abe3 Mon Sep 17 00:00:00 2001 From: Lorenzo Torres Date: Tue, 16 Dec 2025 14:21:41 +0100 Subject: [PATCH] first commit --- .gitignore | 2 + build.zig | 156 +++++++++++++++++++++++++++++++++++++++++++++ build.zig.zon | 81 +++++++++++++++++++++++ graph | 5 ++ src/Lexer.zig | 132 ++++++++++++++++++++++++++++++++++++++ src/Parser.zig | 170 +++++++++++++++++++++++++++++++++++++++++++++++++ src/main.zig | 42 ++++++++++++ src/root.zig | 2 + 8 files changed, 590 insertions(+) create mode 100644 .gitignore create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100755 graph create mode 100644 src/Lexer.zig create mode 100644 src/Parser.zig create mode 100644 src/main.zig create mode 100644 src/root.zig diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dca1103 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +zig-out/ +.zig-cache/ diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..53ebf25 --- /dev/null +++ b/build.zig @@ -0,0 +1,156 @@ +const std = @import("std"); + +// Although this function looks imperative, it does not perform the build +// directly and instead it mutates the build graph (`b`) that will be then +// executed by an external runner. The functions in `std.Build` implement a DSL +// for defining build steps and express dependencies between them, allowing the +// build runner to parallelize the build automatically (and the cache system to +// know when a step doesn't need to be re-run). +pub fn build(b: *std.Build) void { + // Standard target options allow the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + // It's also possible to define more custom flags to toggle optional features + // of this build script using `b.option()`. All defined flags (including + // target and optimize options) will be listed when running `zig build --help` + // in this directory. + + // This creates a module, which represents a collection of source files alongside + // some compilation options, such as optimization mode and linked system libraries. + // Zig modules are the preferred way of making Zig code available to consumers. + // addModule defines a module that we intend to make available for importing + // to our consumers. We must give it a name because a Zig package can expose + // multiple modules and consumers will need to be able to specify which + // module they want to access. + const mod = b.addModule("al", .{ + // The root source file is the "entry point" of this module. Users of + // this module will only be able to access public declarations contained + // in this file, which means that if you have declarations that you + // intend to expose to consumers that were defined in other files part + // of this module, you will have to make sure to re-export them from + // the root file. + .root_source_file = b.path("src/root.zig"), + // Later on we'll use this module as the root module of a test executable + // which requires us to specify a target. + .target = target, + }); + + // Here we define an executable. An executable needs to have a root module + // which needs to expose a `main` function. While we could add a main function + // to the module defined above, it's sometimes preferable to split business + // logic and the CLI into two separate modules. + // + // If your goal is to create a Zig library for others to use, consider if + // it might benefit from also exposing a CLI tool. A parser library for a + // data serialization format could also bundle a CLI syntax checker, for example. + // + // If instead your goal is to create an executable, consider if users might + // be interested in also being able to embed the core functionality of your + // program in their own executable in order to avoid the overhead involved in + // subprocessing your CLI tool. + // + // If neither case applies to you, feel free to delete the declaration you + // don't need and to put everything under a single module. + const exe = b.addExecutable(.{ + .name = "al", + .root_module = b.createModule(.{ + // b.createModule defines a new module just like b.addModule but, + // unlike b.addModule, it does not expose the module to consumers of + // this package, which is why in this case we don't have to give it a name. + .root_source_file = b.path("src/main.zig"), + // Target and optimization levels must be explicitly wired in when + // defining an executable or library (in the root module), and you + // can also hardcode a specific target for an executable or library + // definition if desireable (e.g. firmware for embedded devices). + .target = target, + .optimize = optimize, + // List of modules available for import in source files part of the + // root module. + .imports = &.{ + // Here "al" is the name you will use in your source code to + // import this module (e.g. `@import("al")`). The name is + // repeated because you are allowed to rename your imports, which + // can be extremely useful in case of collisions (which can happen + // importing modules from different packages). + .{ .name = "al", .module = mod }, + }, + }), + }); + + // This declares intent for the executable to be installed into the + // install prefix when running `zig build` (i.e. when executing the default + // step). By default the install prefix is `zig-out/` but can be overridden + // by passing `--prefix` or `-p`. + b.installArtifact(exe); + + // This creates a top level step. Top level steps have a name and can be + // invoked by name when running `zig build` (e.g. `zig build run`). + // This will evaluate the `run` step rather than the default step. + // For a top level step to actually do something, it must depend on other + // steps (e.g. a Run step, as we will see in a moment). + const run_step = b.step("run", "Run the app"); + + // This creates a RunArtifact step in the build graph. A RunArtifact step + // invokes an executable compiled by Zig. Steps will only be executed by the + // runner if invoked directly by the user (in the case of top level steps) + // or if another step depends on it, so it's up to you to define when and + // how this Run step will be executed. In our case we want to run it when + // the user runs `zig build run`, so we create a dependency link. + const run_cmd = b.addRunArtifact(exe); + run_step.dependOn(&run_cmd.step); + + // By making the run step depend on the default step, it will be run from the + // installation directory rather than directly from within the cache directory. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // Creates an executable that will run `test` blocks from the provided module. + // Here `mod` needs to define a target, which is why earlier we made sure to + // set the releative field. + const mod_tests = b.addTest(.{ + .root_module = mod, + }); + + // A run step that will run the test executable. + const run_mod_tests = b.addRunArtifact(mod_tests); + + // Creates an executable that will run `test` blocks from the executable's + // root module. Note that test executables only test one module at a time, + // hence why we have to create two separate ones. + const exe_tests = b.addTest(.{ + .root_module = exe.root_module, + }); + + // A run step that will run the second test executable. + const run_exe_tests = b.addRunArtifact(exe_tests); + + // A top level step for running all tests. dependOn can be called multiple + // times and since the two run steps do not depend on one another, this will + // make the two of them run in parallel. + const test_step = b.step("test", "Run tests"); + test_step.dependOn(&run_mod_tests.step); + test_step.dependOn(&run_exe_tests.step); + + // Just like flags, top level steps are also listed in the `--help` menu. + // + // The Zig build system is entirely implemented in userland, which means + // that it cannot hook into private compiler APIs. All compilation work + // orchestrated by the build system will result in other Zig compiler + // subcommands being invoked with the right flags defined. You can observe + // these invocations when one fails (or you pass a flag to increase + // verbosity) to validate assumptions and diagnose problems. + // + // Lastly, the Zig build system is relatively simple and self-contained, + // and reading its source code will allow you to master it. +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..1eada01 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,81 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = .al, + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = 0x793b656ad110237c, // Changing this has security and trust implications. + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "0.15.2", + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. If the contents of a URL change this will result in a hash mismatch + // // which will prevent zig from using it. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + // + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/graph b/graph new file mode 100755 index 0000000..5434453 --- /dev/null +++ b/graph @@ -0,0 +1,5 @@ +#!/bin/bash + +zig build run 2>&1 >/dev/null | dot -Tpdf -o graph.pdf +zathura graph.pdf +rm graph.pdf diff --git a/src/Lexer.zig b/src/Lexer.zig new file mode 100644 index 0000000..6e7e8aa --- /dev/null +++ b/src/Lexer.zig @@ -0,0 +1,132 @@ +const std = @import("std"); +const Lexer = @This(); + +index: usize, +source: []u8, +start: usize, + +pub const TokenType = enum { + plus, + minus, + star, + slash, + integer, + float, + identifier, + + eof, + illegal, +}; + +pub const Token = struct { + @"type": TokenType, + lexeme: []u8, +}; + +pub fn parseSpecial(lexer: *Lexer) Token { + var token_type: TokenType = .eof; + switch (lexer.source[lexer.index]) { + '+' => token_type = .plus, + '-' => token_type = .minus, + '*' => token_type = .star, + '/' => token_type = .slash, + else => token_type = .eof, + } + + lexer.index += 1; + + return .{ + .@"type" = token_type, + .lexeme = undefined, + }; +} + +pub fn next(lexer: *Lexer) Token { + lexer.skipWhitespaceAndComments(); + + if (lexer.index >= lexer.source.len) { + return lexer.makeToken(.eof); + } + + const c = lexer.source[lexer.index]; + + // Numbers + if (std.ascii.isDigit(c)) { + return lexer.number(); + } + + // Identifiers + if (std.ascii.isAlphabetic(c)) { + } + + // Single Character Tokens + lexer.index += 1; + switch (c) { + '+' => return lexer.makeToken(.plus), + '-' => return lexer.makeToken(.minus), + '*' => return lexer.makeToken(.star), + '/' => return lexer.makeToken(.slash), + else => return lexer.makeToken(.illegal), + } +} + +fn makeToken(lexer: *Lexer, @"type": TokenType) Token { + return .{ + .@"type" = @"type", + // Safely slice the source + .lexeme = if (lexer.index <= lexer.source.len) + lexer.source[lexer.start..lexer.index] + else + "", + }; +} + +fn skipWhitespaceAndComments(lexer: *Lexer) void { + while (lexer.index < lexer.source.len) { + const c = lexer.source[lexer.index]; + switch (c) { + // Whitespace + ' ', '\t', '\r', '\n' => { + lexer.index += 1; + }, + // Comments + '#' => { + while (lexer.index < lexer.source.len and lexer.source[lexer.index] != '\n') { + lexer.index += 1; + } + }, + else => { + lexer.start = lexer.index; + return; + } + } + } +} + +fn number(lexer: *Lexer) Token { + while (lexer.index < lexer.source.len and std.ascii.isDigit(lexer.source[lexer.index])) { + lexer.index += 1; + } + + if (lexer.index < lexer.source.len and lexer.source[lexer.index] == '.') { + if (lexer.index + 1 < lexer.source.len and std.ascii.isDigit(lexer.source[lexer.index + 1])) { + lexer.index += 1; // consume dot + while (lexer.index < lexer.source.len and std.ascii.isDigit(lexer.source[lexer.index])) { + lexer.index += 1; + } + return lexer.makeToken(.float); + } + } + + return lexer.makeToken(.integer); +} + +/// If `source` was allocated on the heap, +/// the caller must free it. +pub fn init(source: []u8) Lexer { + return .{ + .index = 0, + .source = source, + .start = 0, + }; +} diff --git a/src/Parser.zig b/src/Parser.zig new file mode 100644 index 0000000..75bd9bd --- /dev/null +++ b/src/Parser.zig @@ -0,0 +1,170 @@ +const std = @import("std"); +const Lexer = @import("Lexer.zig"); +const Parser = @This(); + +lexer: *Lexer, +allocator: std.mem.Allocator, +node_table: std.AutoHashMap(u64, *Node), +previous: Lexer.Token, +current: Lexer.Token, + +pub const NodeType = enum { + add, + sub, + mul, + div, + + integer, + float, + + start, + @"return", +}; + +pub const Node = struct { + @"type": NodeType, + id: u64, + inputs: std.ArrayList(*Node), + outputs: std.ArrayList(*Node), + data: extern union { + integer: u64, + float: f64, + }, + + pub fn init(parser: *Parser, @"type": NodeType) !*Node { + var node = try parser.allocator.create(Node); + node.@"type" = @"type"; + node.inputs = .{}; + node.outputs = .{}; + node.data = undefined; + return node; + } + + pub fn globalNumbering(node: *Node, parser: *Parser) !*Node { + const node_hash = node.hash(); + node.id = node_hash; + if (parser.node_table.get(node_hash)) |n| { + parser.allocator.destroy(node); + return n; + } + + try parser.node_table.put(node_hash, node); + + return node; + } + + pub fn hash(node: *Node) u64 { + var hasher = std.hash.Wyhash.init(0); + std.hash.autoHash(&hasher, node.@"type"); + + switch (node.@"type") { + .integer => std.hash.autoHash(&hasher, node.data.integer), + .float => std.hash.autoHash(&hasher, @as(u64, @bitCast(node.data.float))), + else => {}, + } + + for (node.inputs.items) |n| { + std.hash.autoHash(&hasher, @intFromPtr(n)); + } + + return hasher.final(); + } + + pub fn deinit(node: *Node, parser: *Parser) void { + parser.allocator.destroy(node); + } +}; + +pub fn match(parser: *Parser, expected: Lexer.TokenType) bool { + if (parser.current.@"type" == expected) { + parser.advance(); + return true; + } + return false; +} + +pub fn advance(parser: *Parser) void { + parser.previous = parser.current; + parser.current = parser.lexer.next(); +} + +pub fn buildFactor(parser: *Parser) !?*Node { + const token = parser.current; + switch (token.@"type") { + .integer => { + parser.advance(); + const node = try Node.init(parser, .integer); + node.data.integer = std.fmt.parseInt(u64, token.lexeme, 10) catch 0; + return node.globalNumbering(parser); + }, + .float => { + parser.advance(); + const node = try Node.init(parser, .float); + node.data.float = std.fmt.parseFloat(f64, token.lexeme) catch 0; + return node.globalNumbering(parser); + }, + else => {} + } + + return null; +} + +pub fn buildTerm(parser: *Parser) !?*Node { + var lhs = try parser.buildFactor(); + + while (parser.match(.star) or parser.match(.slash)) { + const node_type: NodeType = switch (parser.previous.@"type") { + .star => .mul, + .slash => .div, + else => unreachable, + }; + var node = try Node.init(parser, node_type); + try node.inputs.append(parser.allocator, (try parser.buildFactor()).?); + try node.inputs.append(parser.allocator, lhs.?); + node = try node.globalNumbering(parser); + lhs = node; + } + + return lhs; +} + +pub fn buildExpression(parser: *Parser) !?*Node { + var lhs = try parser.buildTerm(); + + while (parser.match(.plus) or parser.match(.minus)) { + const node_type: NodeType = switch (parser.previous.@"type") { + .plus => .add, + .minus => .sub, + else => unreachable, + }; + var node = try Node.init(parser, node_type); + try node.inputs.append(parser.allocator, (try parser.buildTerm()).?); + try node.inputs.append(parser.allocator, lhs.?); + node = try node.globalNumbering(parser); + lhs = node; + } + + return lhs; +} + +pub fn buildGraph(parser: *Parser) !?*Node { + return try buildExpression(parser); +} + +pub fn init(allocator: std.mem.Allocator, lexer: *Lexer) Parser { + var parser: Parser = .{ + .lexer = lexer, + .allocator = allocator, + .node_table = std.AutoHashMap(u64, *Node).init(allocator), + .previous = undefined, + .current = undefined, + }; + + parser.advance(); + + return parser; +} + +pub fn deinit(parser: *Parser) void { + parser.node_table.deinit(); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..8c5faad --- /dev/null +++ b/src/main.zig @@ -0,0 +1,42 @@ +const std = @import("std"); +const al = @import("al"); + +pub fn nodeName(node: *al.Parser.Node) void { + switch (node.@"type") { + .start => std.debug.print("{d} [label=\"start\",fillcolor=yellow, color=black, shape=box]", .{node.id}), + .add => std.debug.print("{d} [label=\"+\"]", .{node.id}), + .sub => std.debug.print("{d} [label=\"-\"]", .{node.id}), + .mul => std.debug.print("{d} [label=\"*\"]", .{node.id}), + .div => std.debug.print("{d} [label=\"/\"]", .{node.id}), + .integer => std.debug.print("{d} [label=\"{d}\"]", .{node.id, node.data.integer}), + .float => std.debug.print("{d} [label=\"{d}\"]", .{node.id, node.data.float}), + else => {}, + } + std.debug.print("\n", .{}); +} + +pub fn printGraph(node: *al.Parser.Node) void { + for (node.inputs.items) |n| { + nodeName(n); + std.debug.print("{d}->{d}\n", .{node.id, n.id}); + printGraph(n); + } +} + +pub fn main() !void { + var gpa = std.heap.DebugAllocator(.{}).init; + defer { + //_ = gpa.detectLeaks(); + } + const allocator = gpa.allocator(); + + var lexer = al.Lexer.init(@constCast("3*2+2.2")); + var parser = al.Parser.init(allocator, &lexer); + defer parser.deinit(); + const graph = try parser.buildGraph(); + defer graph.?.deinit(&parser); + std.debug.print("digraph G {{\n", .{}); + nodeName(graph.?); + printGraph(graph.?); + std.debug.print("}}\n", .{}); +} diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..2a6ebfc --- /dev/null +++ b/src/root.zig @@ -0,0 +1,2 @@ +pub const Lexer = @import("Lexer.zig"); +pub const Parser = @import("Parser.zig");