compiler_project_tc3002_b/
lib.rs

1//! # Lexical Analyzer
2//! #### Diego Eduardo Hernandez Cadena - A00834015
3//!  
4//! Implements a lexical analyzer using the `plex` crate. It reads the specified input file, tries to parse it, and prints a list of the resulting tokens.
5//!
6//! ## Usage
7//! To use the lexical analyzer, set the `INPUT_FILE_PATH` constant to the desired location and run `cargo run` to execute the program.
8//!
9//! ## Adding more keywords and operators
10//! To add a keyword or operator:
11//! - Add it to the `Keyword` or `Operator` enum.
12//! - Add an entry to the corresponding `phf_map!` static map.
13//! - If necessary, update the regular expressions in the `lexer!` macro.
14//!
15//! ## Adding more tokens
16//! To add a new token:
17//! - Add a new variant to the `Token` enum.
18//! - Add a matching rule in the `lexer!` macro that maps input to the new token.
19
20use phf::phf_map;
21use plex::lexer;
22use std::{fs::File, io::Read};
23
24/// Represents supported keywords that the lexer can recognize
25#[derive(Debug, Clone, PartialEq)]
26pub enum Keyword {
27    While,
28    For,
29    Fn,
30    If,
31    Else,
32}
33
34/// Represents supported operators in the language
35#[derive(Debug, Clone, PartialEq)]
36pub enum Operator {
37    // Arithmetic
38    Plus,
39    Minus,
40    Multiply,
41    Divide,
42    PlusEqual,
43    MinusEqual,
44    MultiplyEqual,
45    DivideEqual,
46    Modulo,
47    Equal,
48
49    // Comparison
50    EqualEqual,
51    NotEqual,
52    Less,
53    LessEqual,
54    Greater,
55    GreaterEqual,
56
57    // Logical
58    And,
59    Or,
60    Not,
61}
62
63/// Represents all possible tokens that can be produced by the lexer
64#[allow(dead_code)]
65#[derive(Debug, PartialEq)]
66pub enum Token {
67    /// Integer literals (e.g. `42`)
68    Integer(i64),
69    /// Whitespace characters (e.g. space, tab, newline)
70    Whitespace,
71    /// Identifiers (e.g. variable or function names)
72    Identifier(String),
73    /// Floating point literals (e.g. `3.14`)
74    Decimal(f64),
75    /// Language keywords (e.g. `while`, `if`)
76    Keyword(Keyword),
77    /// Operators (e.g. `+`, `!=`)
78    Operator(Operator),
79}
80
81/// Mapping of keyword strings to `Keyword` enum values
82static KEYWORDS: phf::Map<&'static str, Keyword> = phf_map! {
83    "while" => Keyword::While,
84    "for" => Keyword::For,
85    "fn" => Keyword::Fn,
86    "if" => Keyword::If,
87    "else" => Keyword::Else,
88};
89
90/// Mapping of operator strings to `Operator` enum values
91static OPERATORS: phf::Map<&'static str, Operator> = phf_map! {
92    // Arithmetic
93    "+" => Operator::Plus,
94    "+=" => Operator::PlusEqual,
95    "-" => Operator::Minus,
96    "-=" => Operator::MinusEqual,
97    "*" => Operator::Multiply,
98    "*=" => Operator::MultiplyEqual,
99    "/" => Operator::Divide,
100    "/=" => Operator::DivideEqual,
101    "%" => Operator::Modulo,
102    "=" => Operator::Equal,
103
104    // Comparison
105    "==" => Operator::EqualEqual,
106    "!=" => Operator::NotEqual,
107    "<" => Operator::Less,
108    "<=" => Operator::LessEqual,
109    ">" => Operator::Greater,
110    ">=" => Operator::GreaterEqual,
111
112    // Logical
113    "&&" => Operator::And,
114    "||" => Operator::Or,
115    "!" => Operator::Not,
116};
117
118/// Reads the contents of the file at the specified path
119///
120/// # Panics
121/// Panics if the file cannot be opened or read.
122pub fn extract_file_contents(file_name: &str) -> String {
123    let mut file = File::open(file_name).expect("Failed to open file");
124    let mut contents = String::new();
125
126    file.read_to_string(&mut contents)
127        .expect("Failed to read file");
128
129    contents
130}
131
132/// Tries to match a string slice to a known `Keyword`
133pub fn parse_keyword(s: &str) -> Option<Keyword> {
134    KEYWORDS.get(s).cloned()
135}
136
137/// Tries to match a string slice to a known `Operator`
138pub fn parse_operator(s: &str) -> Option<Operator> {
139    OPERATORS.get(s).cloned()
140}
141
142// Lexer definition that converts input strings into tokens
143lexer! {
144    fn take_token(tok: 'a) -> Token;
145
146    r"[ \n\t]+" => Token::Whitespace,
147    r"-?[0-9]+\.[0-9]+" => Token::Decimal(tok.parse().unwrap()),
148    r"-?[0-9]+" => Token::Integer(tok.parse().unwrap()),
149    r"\+=|-=|\*=|/=|==|!=|<=|>=|\&\&|\|\||[+\\\-*\/%<>!=]" => {
150        if let Some(op) = parse_operator(tok) {
151            Token::Operator(op)
152        } else {
153            panic!("Unknown operator: {}", tok);
154        }
155    }
156    "[a-zA-Z_][a-zA-Z0-9_]*" => {
157        if let Some(keyword) = parse_keyword(tok) {
158            Token::Keyword(keyword)
159        } else {
160            Token::Identifier(String::from(tok))
161        }
162    }
163}
164
165/// Extracts all tokens from the input string using the lexer
166///
167/// # Panics
168/// Panics if two non-whitespace tokens are found without a valid separator between them.
169pub fn extract_tokens(input: String) -> Vec<Token> {
170    let mut remaining = input.as_str();
171    let mut tokens: Vec<Token> = Vec::new();
172
173    while let Some((token, new_remaining)) = take_token(remaining) {
174        if let Some(prev_token) = tokens.last() {
175            if !matches!(prev_token, Token::Whitespace) && !matches!(token, Token::Whitespace) {
176                panic!(
177                    "Missing separator between tokens {:?} and {:?}",
178                    prev_token, token
179                )
180            }
181        }
182
183        tokens.push(token);
184        remaining = new_remaining;
185    }
186
187    if !remaining.trim().is_empty() {
188        let position = input.len() - remaining.len();
189        panic!(
190            "Unrecognized token starting at position {}: {:?}",
191            position, remaining
192        );
193    }
194
195    tokens
196}
197
198/// Main function: reads input, tokenizes it, and prints each token (excluding whitespace)
199pub fn run(input_file: &str) {
200    let s = extract_file_contents(input_file);
201    let tokens = extract_tokens(s);
202
203    for tok in tokens {
204        if matches!(tok, Token::Whitespace) {
205            continue;
206        }
207        println!("Token: {:?}", tok);
208    }
209}