Skip to content

Commit

Permalink
Improved lexer error message.
Browse files Browse the repository at this point in the history
  • Loading branch information
ReubenHillyard committed Sep 27, 2023
1 parent fe5e972 commit ecd4cc6
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 38 deletions.
48 changes: 48 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ peg = "0.8.1"
inkwell = { version = "0.2.0", features = ["llvm16-0"] }
clap = { version = "4.4.4", features = ["derive"] }
tempfile = "3.8.0"
annotate-snippets = { version = "0.9.1", features = ["color"] }
88 changes: 72 additions & 16 deletions src/commands/main_command.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Type and functions for providing the main command.

use crate::commands::OptLevel;
use crate::commands::Arguments;
use crate::commands::OptLevel;
use crate::ir_gen::values::captures::{Captures, Environment};
use crate::ir_gen::values::tags::structure::Structure;
use crate::ir_gen::values::tags::tag;
Expand All @@ -17,6 +17,9 @@ use crate::typing::environments::Definitions;
use crate::typing::evaluation::Evaluate;
use crate::typing::expression::TypedExpression;
use crate::typing::read_back::read_back_with_ctx_len;
use crate::utility::lines_and_offsets;
use annotate_snippets::display_list::{DisplayList, FormatOptions};
use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation};
use inkwell::context::Context;
use inkwell::module::Module;
use inkwell::passes::PassManager;
Expand All @@ -36,10 +39,7 @@ impl MainArguments {
/// Creates `self` from the command line arguments by reading source from the input path.
pub fn new(args: Arguments) -> Result<MainArguments, String> {
let source = read_to_string(&args.path).map_err(|e| e.to_string())?;
Ok(MainArguments {
source,
args,
})
Ok(MainArguments { source, args })
}

/// Runs the compiler on the given arguments.
Expand Down Expand Up @@ -74,12 +74,69 @@ impl MainArguments {

fn lex(&self) -> Result<Vec<Token>, String> {
let tokens: Vec<_> = lex(&self.source).collect();
let (tokens, errors): (Vec<_>, Vec<_>) = tokens.into_iter().partition_map(|t| match t {
Ok(token) => Either::Left(token),
Err(error) => Either::Right(error),
});
let (tokens, errors): (Vec<_>, Vec<_>) =
tokens
.into_iter()
.partition_map(|(token, span)| match token {
Ok(token) => Either::Left(token),
Err(()) => Either::Right(span),
});
if !errors.is_empty() {
return Err(format!("lexing errors: {:#?}", errors))
let slices = {
let mut errors = errors.into_iter().peekable();
let origin = Some(self.args.path.to_str().unwrap());
let fold = false;
lines_and_offsets(&self.source)
.enumerate()
.filter_map(|(line_number, (source, line_offset))| {
let line_start = line_number + 1;
while errors.peek().is_some_and(|err| err.start < line_offset) {
errors.next();
}
errors
.peek()
.iter()
.filter_map(|err| {
if err.start < line_offset + source.len() {
let annotation = SourceAnnotation {
range: (err.start - line_offset, err.end - line_offset),
label: "beginning here",
annotation_type: AnnotationType::Error,
};
Some(Slice {
source,
line_start,
origin,
annotations: vec![annotation],
fold,
})
} else {
None
}
})
.next()
})
.collect()
};
let snippet = Snippet {
title: Some(Annotation {
id: None,
label: Some("unrecognized token(s)"),
annotation_type: AnnotationType::Error,
}),
footer: vec![Annotation {
id: None,
label: Some("aborting due to previous error(s)"),
annotation_type: AnnotationType::Error,
}],
slices,
opt: FormatOptions {
color: true,
anonymized_line_numbers: false,
margin: None,
},
};
return Err(DisplayList::from(snippet).to_string());
}
verbose_println!(self.args, "lexed file");
Ok(tokens)
Expand Down Expand Up @@ -114,7 +171,9 @@ impl MainArguments {
}
};
if !defs.all_metas_defined() {
return Err(format!("could not deduce values for some meta-variables in `{name}`"));
return Err(format!(
"could not deduce values for some meta-variables in `{name}`"
));
}
let value = typed_expr.evaluate(defs.with_empty_env());
let type_expr = read_back_with_ctx_len(&defs, 0, value.get_type());
Expand Down Expand Up @@ -203,10 +262,7 @@ impl MainArguments {
.compiler_builder
.build_store(zero_i32_ptr, u32_type.const_zero());

let nat_main = main_fn
.compiler_builder
.compiler
.get_global_ptr("nat_main");
let nat_main = main_fn.compiler_builder.compiler.get_global_ptr("nat_main");
let Some(nat_main) = nat_main else {
return Err("missing `nat_main`".to_string())
};
Expand Down Expand Up @@ -318,7 +374,7 @@ impl MainArguments {
};
let status = clang_output.status;
if !status.success() {
return Err(format!("clang-16 failed with exit status {status}"))
return Err(format!("clang-16 failed with exit status {status}"));
}
verbose_println!(self.args, "compiled to machine code");

Expand Down
2 changes: 1 addition & 1 deletion src/ir_gen/values/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ impl<'ctx> Compiler<'ctx> {
if ret_type.is_some() {
self.add_enum_attribute(function, AttributeLoc::Return, NoUndef);
}
if ret_type.map(BasicTypeEnum::is_pointer_type) != Some(true) {
if !ret_type.is_some_and(BasicTypeEnum::is_pointer_type) {
for nth in 0..function.count_params() {
self.add_enum_attribute(function, AttributeLoc::Param(nth), NoCapture);
}
Expand Down
57 changes: 41 additions & 16 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ mod token {
}
}

use logos::Span;
pub use token::Token;

/// Lexes a `&str` into an iterator of [`Result<Token, usize>`](Token).
/// Lexes a `&str` into an iterator of [`(Result<Token, ()>, Span)`](Token).
///
/// An `Ok` value is a lexed token, and an `Err` value is the index of a character that could not be
/// lexed into a token.
/// Yields pairs of a token or non-token, and the span at which they occurred in `source`.
///
/// Whitespace and comments are ignored.
///
Expand All @@ -52,19 +52,29 @@ pub use token::Token;
/// ```
/// # use Token::*;
/// #
/// assert!(lex(r"//comment").eq([]));
/// assert_eq!(lex(r"//comment").collect::<Vec<_>>(), []);
///
/// assert!(lex(r"a => a").eq([Ok(Identifier("a")), Ok(DoubleArrow), Ok(Identifier("a"))]));
/// assert_eq!(
/// lex(r"a => a").collect::<Vec<_>>(),
/// [
/// (Ok(Identifier("a")), 0..1),
/// (Ok(DoubleArrow), 2..4),
/// (Ok(Identifier("a")), 5..6)
/// ]
/// );
///
/// assert!(lex(r"Type % -> Type").eq([Ok(Type), Err(5), Ok(SingleArrow), Ok(Type)]));
/// assert_eq!(
/// lex(r"Type % -> Type").collect::<Vec<_>>(),
/// [
/// (Ok(Type), 0..4),
/// (Err(()), 5..6),
/// (Ok(SingleArrow), 7..9),
/// (Ok(Type), 10..14),
/// ]
/// );
/// ```
pub fn lex(src: &str) -> impl Iterator<Item = Result<Token, usize>> {
<Token as logos::Logos>::lexer(src)
.spanned()
.map(|(token, span)| match token {
Ok(token) => Ok(token),
Err(()) => Err(span.start),
})
pub fn lex(source: &str) -> impl Iterator<Item = (Result<Token, ()>, Span)> {
<Token as logos::Logos>::lexer(source).spanned()
}

#[cfg(test)]
Expand All @@ -76,10 +86,25 @@ mod tests {
fn doctest() {
use Token::*;

assert!(lex(r"//comment").eq([]));
assert_eq!(lex(r"//comment").collect::<Vec<_>>(), []);

assert!(lex(r"a => a").eq([Ok(Identifier("a")), Ok(DoubleArrow), Ok(Identifier("a"))]));
assert_eq!(
lex(r"a => a").collect::<Vec<_>>(),
[
(Ok(Identifier("a")), 0..1),
(Ok(DoubleArrow), 2..4),
(Ok(Identifier("a")), 5..6)
]
);

assert!(lex(r"Type % -> Type").eq([Ok(Type), Err(5), Ok(SingleArrow), Ok(Type)]));
assert_eq!(
lex(r"Type % -> Type").collect::<Vec<_>>(),
[
(Ok(Type), 0..4),
(Err(()), 5..6),
(Ok(SingleArrow), 7..9),
(Ok(Type), 10..14),
]
);
}
}
3 changes: 2 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,17 @@
//! A dependently-typed programming language, aiming to support safe mutable state and a cubical
//! interpretation of univalence.

use std::process;
use clap::Parser;
use commands::main_command::MainArguments;
use commands::Arguments;
use std::process;

pub mod commands;
pub mod ir_gen;
pub mod lexer;
pub mod parser;
pub mod typing;
pub mod utility;

/// Parses command line arguments and runs the compiler on them.
pub fn main() {
Expand Down
11 changes: 7 additions & 4 deletions src/typing/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@ use itertools::{Either, Itertools};

pub fn parse_expr(source: &str) -> Option<Expression> {
let tokens: Vec<_> = lex(source).collect();
let (tokens, errors): (Vec<_>, Vec<_>) = tokens.into_iter().partition_map(|t| match t {
Ok(token) => Either::Left(token),
Err(error) => Either::Right(error),
});
let (tokens, errors): (Vec<_>, Vec<_>) =
tokens
.into_iter()
.partition_map(|(token, span)| match token {
Ok(token) => Either::Left(token),
Err(()) => Either::Right(span),
});
if !errors.is_empty() {
return None;
};
Expand Down
10 changes: 10 additions & 0 deletions src/utility.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//! Function that should be defined elsewhere.

/// An iterator over the lines of a string paired with their offsets in the string.
///
/// This uses [`str::lines`]; so its behaviour is similar.
pub fn lines_and_offsets(source: &str) -> impl Iterator<Item = (&str, usize)> {
source
.lines()
.map(|line| (line, line.as_ptr() as usize - source.as_ptr() as usize))
}

0 comments on commit ecd4cc6

Please sign in to comment.