Source code for ebnf_compiler.scanner
# SPDX-FileCopyrightText: 2026 Filipe Casimiro Ferreira <pro.maiscommentz@gmail.com>
#
# SPDX-License-Identifier: MIT
"""
EBNF Scanner
"""
import typing
from dataclasses import dataclass
from pathlib import Path
import typer
from loguru import logger
from rich import print
from rich.console import Console
from .tokens import Token
console = Console()
[docs]
@dataclass
class Scanner:
eof: bool = False
sym: Token | None = None # Next Symbol
value: str = ""
_ch: str = ""
_file_name: Path | None = None
_text: typing.TextIO | None = None
_text_line: str = ""
_line_no: int = 0
_col_no: int = 0
token_map: typing.ClassVar[dict[str, Token]] = {
"=": Token.EQL,
"(": Token.LPAREN,
")": Token.RPAREN,
"[": Token.LBRAK,
"]": Token.RBRAK,
"{": Token.LBRACE,
"}": Token.RBRACE,
"|": Token.BAR,
".": Token.PERIOD,
}
[docs]
def init(self, f: typing.TextIO) -> None:
self._text = f
self.get_next_char()
[docs]
def open(self, file_name: Path) -> None:
logger.debug(f"Opening {file_name}")
self._file_name = file_name
try:
f = self._file_name.open("r")
self.init(f)
except Exception:
print(f"[bold red]Error: Source file '{file_name}' not found[/bold red]")
raise typer.Exit(code=1) from None
[docs]
def print_error(self, msg: str):
console.print(
f"Error: {msg}"
f"(File {self._file_name}, Line {self._line_no}, Column {self._col_no})"
)
[docs]
def skip_space(self) -> None:
while self._ch.isspace():
self.get_next_char()
[docs]
def get_next_char(self) -> None:
if self._text is None:
raise Exception("Scanner not initialized")
while not self.eof and self._text_line == "":
self._text_line = self._text.readline()
self._line_no += 1
self._col_no = 0
if self._text_line == "":
self.eof = True
break
self._text_line = self._text_line.rstrip()
if self.eof:
self._ch = ""
else:
assert self._text_line != ""
self._ch = self._text_line[0]
self._text_line = self._text_line[1:]
self._col_no += 1
[docs]
def get_next_symbol(self):
self.skip_space()
if self._ch.isalpha():
self.sym = Token.IDENT
self.value = self._ch
self.get_next_char()
while self._ch.isalpha():
self.value += self._ch
self.get_next_char()
elif self._ch == '"':
self.sym = Token.LITERAL
self.value = ""
self.get_next_char()
while not self.eof and self._ch != '"':
self.value += self._ch
self.get_next_char()
if self.eof:
self.print_error("Unterminated literal")
return
self.get_next_char()
elif self._ch == "":
self.sym = Token.EOF
self.value = ""
elif self._ch in self.token_map:
self.sym = self.token_map[self._ch]
self.value = self._ch
self.get_next_char()
else:
self.sym = Token.OTHER
self.value = self._ch
self.get_next_char()
logger.info(f"Token: {self.sym}, Value: {self.value}")