Compare commits

...

3 Commits

Author SHA1 Message Date
1f83946607 feat: add images in README 2025-02-19 16:57:23 +08:00
58deabb023 fix: uninitialized property _reader in Lexer. 2025-02-19 16:29:33 +08:00
ad44a37b3c bump: update to dotnet 9.0 2025-02-19 16:19:11 +08:00
13 changed files with 750 additions and 737 deletions

View File

@ -2,11 +2,13 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework> <TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<PublishAot>true</PublishAot> <PublishAot>true</PublishAot>
<InvariantGlobalization>true</InvariantGlobalization> <InvariantGlobalization>true</InvariantGlobalization>
<StaticExecutable>true</StaticExecutable>
<LinkerFlavor>lld</LinkerFlavor>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>

View File

@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<TargetFramework>net8.0</TargetFramework> <TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
</PropertyGroup> </PropertyGroup>

View File

@ -1,673 +1,13 @@
using System.Text;
using Canon.Core.Abstractions; using Canon.Core.Abstractions;
using Canon.Core.Enums;
using Canon.Core.Exceptions;
namespace Canon.Core.LexicalParser; namespace Canon.Core.LexicalParser;
public class Lexer : ILexer public class Lexer : ILexer
{ {
// 记录token
private SemanticToken? _semanticToken;
private readonly StringBuilder _tokenBuilder = new();
private List<SemanticToken> _tokens = [];
// 状态机
private StateType _state = StateType.Start;
private char _ch;
private bool _finish;
// 文件读取
private ISourceReader _reader;
private uint _line = 1;
private uint _chPos;
public IEnumerable<SemanticToken> Tokenize(ISourceReader reader) public IEnumerable<SemanticToken> Tokenize(ISourceReader reader)
{ {
_reader = reader; LexerStateMachine machine = new(reader);
_tokens = [];
_state = StateType.Start;
while (_state != StateType.Done) return machine.Run();
{
switch (_state)
{
case StateType.Start:
HandleStartState();
break;
case StateType.Comment:
if (_ch == '{')
{
HandleCommentStateBig();
}
else if (_ch == '*')
{
HandleCommentStateSmall();
}
else
{
HandleCommentSingleLine();
}
break;
case StateType.Num:
HandleNumState();
break;
case StateType.Word:
HandleWordState();
break;
case StateType.Delimiter:
HandleDelimiterState();
break;
case StateType.Operator:
HandleOperatorState();
break;
case StateType.BreakPoint:
while (LexRules.IsBreakPoint(_ch))
{
GetChar();
}
Retract();
_state = StateType.Start;
break;
case StateType.Unknown:
throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos,
"Illegal lexeme.");
case StateType.Done:
break;
}
}
_tokens.Add(SemanticToken.End);
return _tokens;
}
private void HandleStartState()
{
// 初始化
ResetTokenBuilder();
// 读取首个字符
GetChar();
if (_finish)
{
_state = StateType.Done;
return;
}
// 根据首个字符判断可能的情况
if (_ch == '{') // 以 “{” 开头,为注释
{
_state = StateType.Comment;
}
else if (_ch == '(')
{
char nextChar = PeekNextChar();
if (nextChar == '*')
{
GetChar();
_state = StateType.Comment;
}
else
{
_state = StateType.Delimiter;
}
}
else if (_ch == '/')
{
char nextChar = PeekNextChar();
if (nextChar == '/')
{
GetChar();
_state = StateType.Comment;
}
else
{
_state = StateType.Operator;
}
}
else if (_ch == '.') // 以 “.” 开头,可能是数字或分隔符
{
char next = PeekNextChar();
if (next is >= '0' and <= '9')
{
_state = StateType.Num;
}
else
{
_state = StateType.Delimiter;
}
}
else if (LexRules.IsLetter(_ch)) // 以字母开头,为关键字或标识符
{
_state = StateType.Word;
}
else if (LexRules.IsDigit(_ch) || _ch == '$') // 以数字或 “$” 开头,为数值
{
_state = StateType.Num;
}
else if (LexRules.IsDelimiter(_ch)) // 为分隔符
{
_state = StateType.Delimiter;
}
else if (LexRules.IsOperator(_ch)) // 为运算符
{
_state = StateType.Operator;
}
else if (LexRules.IsBreakPoint(_ch))
{
_state = StateType.BreakPoint;
}
else
{
_state = StateType.Unknown;
}
}
private void HandleCommentStateBig()
{
while (_ch != '}')
{
GetChar();
if (_finish)
{
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
"The comment is not closed.");
}
}
_state = StateType.Start;
}
private void HandleCommentStateSmall()
{
bool commentClosed = false;
while (!commentClosed)
{
GetChar();
while (_ch != '*')
{
GetChar();
if (_finish)
{
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
"The comment is not closed.");
}
}
GetChar();
if (_finish)
{
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
"The comment is not closed.");
}
if (_ch == ')') commentClosed = true;
}
_state = StateType.Start;
}
private void HandleCommentSingleLine()
{
while (_ch != '\n')
{
GetChar();
}
_state = StateType.Start;
}
private void HandleWordState()
{
while (LexRules.IsDigit(_ch) || LexRules.IsLetter(_ch))
{
Cat();
GetChar();
}
Retract();
string tokenString = GetCurrentTokenString();
if (LexRules.GetKeywordTypeByKeywprd(tokenString, out KeywordType keywordType))
{
_semanticToken = LexemeFactory.MakeToken(keywordType, tokenString, _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Identifier, tokenString, _line, _chPos);
}
AddToTokens(_semanticToken);
_state = StateType.Start;
}
private void HandleNumState()
{
NumberType numberType = NumberType.Integer;
// 十六进制
if (_ch == '$')
{
ProcessHex();
numberType = NumberType.Hex;
}
// 非十六进制
else if (LexRules.IsDigit(_ch) || _ch == '.')
{
while (!NumberShouldBreak())
{
// 含小数部分
if (_ch == '.')
{
// 检查是否是符号 “..”
char next = PeekNextChar();
if (next == '.')
{
Retract();
_state = StateType.Delimiter;
break;
}
// 不是符号 “..”,进入小数点后的判断
Cat(); // 记录“.”
// “.”后不应为空,至少应该有一位小数
GetChar();
if (NumberShouldBreak())
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
"Illegal numbers!");
}
// 读取小数点后的数字
while (!NumberShouldBreak())
{
if (LexRules.IsDigit(_ch))
{
Cat();
GetChar();
}
else if (_ch == 'e' || _ch == 'E')
{
ProcessE();
break;
}
else if (NumberShouldBreak())
{
break;
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
"Illegal number.");
}
}
numberType = NumberType.Real;
break;
}
// 不含小数部分,含科学计数法
if (_ch == 'e' || _ch == 'E')
{
ProcessE();
numberType = NumberType.Real;
break;
}
// 暂时为整数
if (LexRules.IsDigit(_ch))
{
Cat();
GetChar();
}
else if (NumberShouldBreak())
{
numberType = NumberType.Integer;
break;
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
}
}
}
_semanticToken = LexemeFactory.MakeToken(numberType, GetCurrentTokenString(),
_line, _chPos);
AddToTokens(_semanticToken);
_state = StateType.Start;
}
private void ProcessHex()
{
Cat();
GetChar();
while (!NumberShouldBreak())
{
// 假设IsHexDigit方法能够识别十六进制数字
if (LexRules.IsHexDigit(_ch))
{
Cat();
GetChar();
}
else if (NumberShouldBreak())
{
break;
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
"Illegal hex numbers!");
}
}
}
private void ProcessE()
{
Cat();
GetChar();
if (LexRules.IsDigit(_ch) || _ch == '+' || _ch == '-')
{
Cat();
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
}
// 读取e后的数字
GetChar();
while (!NumberShouldBreak())
{
if (LexRules.IsDigit(_ch))
{
Cat();
GetChar();
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
}
}
}
bool NumberShouldBreak()
{
if (_ch == ' ' || _ch == '\n' || _ch == '\t' || _ch == '\r' || (LexRules.IsDelimiter(_ch) && _ch != '.') ||
LexRules.IsOperator(_ch) || _finish)
{
Retract();
return true;
}
return false;
}
private bool IsDot()
{
if (_tokens.Count != 0)
{
SemanticToken tokenBefore = _tokens.Last();
if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true;
}
return false;
}
private void HandleDelimiterState()
{
Cat();
switch (_ch)
{
case '.':
{
GetChar();
if (_ch == '.')
{
Cat();
_semanticToken = LexemeFactory.MakeToken(DelimiterType.DoubleDots, "..", _line, _chPos);
break;
}
Retract();
if (IsDot())
{
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Dot, ".", _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Period, ".", _line, _chPos);
}
}
break;
case '\'':
{
// 重置_token准备收集字符串内容
ResetTokenBuilder();
GetChar(); // 移动到下一个字符,即字符串的第一个字符
while (_ch != '\'' && _ch != '\"')
{
Cat(); // 收集字符
GetChar(); // 移动到下一个字符
if (_ch == '\n' || _finish)
{
throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos,
"The String is not closed.");
}
}
string currentString = GetCurrentTokenString();
if (currentString.Length > 1)
{
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.String,
currentString, _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Character,
currentString, _line, _chPos);
}
ResetTokenBuilder();
if (!(_ch == '\'' || _ch == '\"'))
{
throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos,
"The String is not closed.");
}
}
break;
case ',':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Comma, ",", _line, _chPos);
break;
case ':':
char nextChar = PeekNextChar();
if (nextChar == '=')
{
GetChar();
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Assign, ":=", _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Colon, ":", _line, _chPos);
}
break;
case ';':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Semicolon, ";", _line, _chPos);
break;
case '(':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.LeftParenthesis, "(", _line, _chPos);
break;
case ')':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.RightParenthesis, ")", _line, _chPos);
break;
case '[':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.LeftSquareBracket, "[", _line, _chPos);
break;
case ']':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.RightSquareBracket, "]", _line, _chPos);
break;
}
if (_semanticToken is null)
{
throw new InvalidOperationException();
}
_tokens.Add(_semanticToken);
_state = StateType.Start;
}
private void HandleOperatorState()
{
switch (_ch)
{
case '+': // 识别 +
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Plus, "+", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '-': // 识别 -
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Minus, "-", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '*': // 识别 *
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Multiply, "*", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '/': // 识别 /
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Divide, "/", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '=':
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Equal, "=", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '<':
Cat();
GetChar();
if (_ch == '=')
{
// 识别 <=
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.LessEqual, "<=", _line, _chPos);
AddToTokens(_semanticToken);
}
else if (_ch == '>')
{
// 识别 <>
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.NotEqual, ">", _line, _chPos);
AddToTokens(_semanticToken);
}
else
{
// 识别 <
Retract();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Less, "<", _line, _chPos);
AddToTokens(_semanticToken);
}
break;
case '>':
Cat();
GetChar();
if (_ch == '=')
{
// 识别 >=
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.GreaterEqual, ">=", _line, _chPos);
AddToTokens(_semanticToken);
}
else
{
// 识别 >
Retract();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Greater, ">", _line, _chPos);
AddToTokens(_semanticToken);
}
break;
default:
throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos, "Illegal lexeme.");
}
_state = StateType.Start;
}
private void AddToTokens(SemanticToken semanticToken)
{
_tokens.Add(semanticToken);
}
private void Cat()
{
_tokenBuilder.Append(_ch); // 使用StringBuilder追加字符
}
private string GetCurrentTokenString()
{
return _tokenBuilder.ToString(); // 从StringBuilder获取当前记号的字符串
}
private void ResetTokenBuilder()
{
_tokenBuilder.Clear(); // 清空StringBuilder以复用
}
private char PeekNextChar()
{
// 确认下一个位置是否仍在buffer的范围内
if (_reader.TryPeekChar(out char? c))
{
return c.Value;
}
else
{
return char.MinValue;
}
}
void GetChar()
{
if (_finish)
{
return;
}
_finish = !_reader.MoveNext();
if (_finish)
{
_ch = char.MinValue;
return;
}
_ch = _reader.Current;
_line = _reader.Line;
_chPos = _reader.Pos;
}
void Retract()
{
_reader.Retract();
} }
} }

View File

@ -0,0 +1,669 @@
using System.Text;
using Canon.Core.Abstractions;
using Canon.Core.Enums;
using Canon.Core.Exceptions;
namespace Canon.Core.LexicalParser;
public sealed class LexerStateMachine(ISourceReader reader)
{
// 记录token
private SemanticToken? _semanticToken;
private readonly StringBuilder _tokenBuilder = new();
private readonly List<SemanticToken> _tokens = [];
// 状态机
private StateType _state = StateType.Start;
private char _ch;
private bool _finish;
// 文件读取
private uint _line = 1;
private uint _chPos;
public IEnumerable<SemanticToken> Run()
{
while (_state != StateType.Done)
{
switch (_state)
{
case StateType.Start:
HandleStartState();
break;
case StateType.Comment:
if (_ch == '{')
{
HandleCommentStateBig();
}
else if (_ch == '*')
{
HandleCommentStateSmall();
}
else
{
HandleCommentSingleLine();
}
break;
case StateType.Num:
HandleNumState();
break;
case StateType.Word:
HandleWordState();
break;
case StateType.Delimiter:
HandleDelimiterState();
break;
case StateType.Operator:
HandleOperatorState();
break;
case StateType.BreakPoint:
while (LexRules.IsBreakPoint(_ch))
{
GetChar();
}
Retract();
_state = StateType.Start;
break;
case StateType.Unknown:
throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos,
"Illegal lexeme.");
case StateType.Done:
break;
}
}
_tokens.Add(SemanticToken.End);
return _tokens;
}
private void HandleStartState()
{
// 初始化
ResetTokenBuilder();
// 读取首个字符
GetChar();
if (_finish)
{
_state = StateType.Done;
return;
}
// 根据首个字符判断可能的情况
if (_ch == '{') // 以 “{” 开头,为注释
{
_state = StateType.Comment;
}
else if (_ch == '(')
{
char nextChar = PeekNextChar();
if (nextChar == '*')
{
GetChar();
_state = StateType.Comment;
}
else
{
_state = StateType.Delimiter;
}
}
else if (_ch == '/')
{
char nextChar = PeekNextChar();
if (nextChar == '/')
{
GetChar();
_state = StateType.Comment;
}
else
{
_state = StateType.Operator;
}
}
else if (_ch == '.') // 以 “.” 开头,可能是数字或分隔符
{
char next = PeekNextChar();
if (next is >= '0' and <= '9')
{
_state = StateType.Num;
}
else
{
_state = StateType.Delimiter;
}
}
else if (LexRules.IsLetter(_ch)) // 以字母开头,为关键字或标识符
{
_state = StateType.Word;
}
else if (LexRules.IsDigit(_ch) || _ch == '$') // 以数字或 “$” 开头,为数值
{
_state = StateType.Num;
}
else if (LexRules.IsDelimiter(_ch)) // 为分隔符
{
_state = StateType.Delimiter;
}
else if (LexRules.IsOperator(_ch)) // 为运算符
{
_state = StateType.Operator;
}
else if (LexRules.IsBreakPoint(_ch))
{
_state = StateType.BreakPoint;
}
else
{
_state = StateType.Unknown;
}
}
private void HandleCommentStateBig()
{
while (_ch != '}')
{
GetChar();
if (_finish)
{
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
"The comment is not closed.");
}
}
_state = StateType.Start;
}
private void HandleCommentStateSmall()
{
bool commentClosed = false;
while (!commentClosed)
{
GetChar();
while (_ch != '*')
{
GetChar();
if (_finish)
{
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
"The comment is not closed.");
}
}
GetChar();
if (_finish)
{
throw new LexemeException(LexemeErrorType.UnclosedComment, _line, _chPos,
"The comment is not closed.");
}
if (_ch == ')') commentClosed = true;
}
_state = StateType.Start;
}
private void HandleCommentSingleLine()
{
while (_ch != '\n')
{
GetChar();
}
_state = StateType.Start;
}
private void HandleWordState()
{
while (LexRules.IsDigit(_ch) || LexRules.IsLetter(_ch))
{
Cat();
GetChar();
}
Retract();
string tokenString = GetCurrentTokenString();
if (LexRules.GetKeywordTypeByKeywprd(tokenString, out KeywordType keywordType))
{
_semanticToken = LexemeFactory.MakeToken(keywordType, tokenString, _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Identifier, tokenString, _line, _chPos);
}
AddToTokens(_semanticToken);
_state = StateType.Start;
}
private void HandleNumState()
{
NumberType numberType = NumberType.Integer;
// 十六进制
if (_ch == '$')
{
ProcessHex();
numberType = NumberType.Hex;
}
// 非十六进制
else if (LexRules.IsDigit(_ch) || _ch == '.')
{
while (!NumberShouldBreak())
{
// 含小数部分
if (_ch == '.')
{
// 检查是否是符号 “..”
char next = PeekNextChar();
if (next == '.')
{
Retract();
_state = StateType.Delimiter;
break;
}
// 不是符号 “..”,进入小数点后的判断
Cat(); // 记录“.”
// “.”后不应为空,至少应该有一位小数
GetChar();
if (NumberShouldBreak())
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
"Illegal numbers!");
}
// 读取小数点后的数字
while (!NumberShouldBreak())
{
if (LexRules.IsDigit(_ch))
{
Cat();
GetChar();
}
else if (_ch == 'e' || _ch == 'E')
{
ProcessE();
break;
}
else if (NumberShouldBreak())
{
break;
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
"Illegal number.");
}
}
numberType = NumberType.Real;
break;
}
// 不含小数部分,含科学计数法
if (_ch == 'e' || _ch == 'E')
{
ProcessE();
numberType = NumberType.Real;
break;
}
// 暂时为整数
if (LexRules.IsDigit(_ch))
{
Cat();
GetChar();
}
else if (NumberShouldBreak())
{
numberType = NumberType.Integer;
break;
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
}
}
}
_semanticToken = LexemeFactory.MakeToken(numberType, GetCurrentTokenString(),
_line, _chPos);
AddToTokens(_semanticToken);
_state = StateType.Start;
}
private void ProcessHex()
{
Cat();
GetChar();
while (!NumberShouldBreak())
{
// 假设IsHexDigit方法能够识别十六进制数字
if (LexRules.IsHexDigit(_ch))
{
Cat();
GetChar();
}
else if (NumberShouldBreak())
{
break;
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos,
"Illegal hex numbers!");
}
}
}
private void ProcessE()
{
Cat();
GetChar();
if (LexRules.IsDigit(_ch) || _ch == '+' || _ch == '-')
{
Cat();
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
}
// 读取e后的数字
GetChar();
while (!NumberShouldBreak())
{
if (LexRules.IsDigit(_ch))
{
Cat();
GetChar();
}
else
{
throw new LexemeException(LexemeErrorType.IllegalNumberFormat, _line, _chPos, "Illegal number.");
}
}
}
bool NumberShouldBreak()
{
if (_ch == ' ' || _ch == '\n' || _ch == '\t' || _ch == '\r' || (LexRules.IsDelimiter(_ch) && _ch != '.') ||
LexRules.IsOperator(_ch) || _finish)
{
Retract();
return true;
}
return false;
}
private bool IsDot()
{
if (_tokens.Count != 0)
{
SemanticToken tokenBefore = _tokens.Last();
if (tokenBefore.TokenType == SemanticTokenType.Identifier) return true;
}
return false;
}
private void HandleDelimiterState()
{
Cat();
switch (_ch)
{
case '.':
{
GetChar();
if (_ch == '.')
{
Cat();
_semanticToken = LexemeFactory.MakeToken(DelimiterType.DoubleDots, "..", _line, _chPos);
break;
}
Retract();
if (IsDot())
{
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Dot, ".", _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Period, ".", _line, _chPos);
}
}
break;
case '\'':
{
// 重置_token准备收集字符串内容
ResetTokenBuilder();
GetChar(); // 移动到下一个字符,即字符串的第一个字符
while (_ch != '\'' && _ch != '\"')
{
Cat(); // 收集字符
GetChar(); // 移动到下一个字符
if (_ch == '\n' || _finish)
{
throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos,
"The String is not closed.");
}
}
string currentString = GetCurrentTokenString();
if (currentString.Length > 1)
{
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.String,
currentString, _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Character,
currentString, _line, _chPos);
}
ResetTokenBuilder();
if (!(_ch == '\'' || _ch == '\"'))
{
throw new LexemeException(LexemeErrorType.UnclosedStringLiteral, _line, _chPos,
"The String is not closed.");
}
}
break;
case ',':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Comma, ",", _line, _chPos);
break;
case ':':
char nextChar = PeekNextChar();
if (nextChar == '=')
{
GetChar();
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Assign, ":=", _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Colon, ":", _line, _chPos);
}
break;
case ';':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.Semicolon, ";", _line, _chPos);
break;
case '(':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.LeftParenthesis, "(", _line, _chPos);
break;
case ')':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.RightParenthesis, ")", _line, _chPos);
break;
case '[':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.LeftSquareBracket, "[", _line, _chPos);
break;
case ']':
_semanticToken = LexemeFactory.MakeToken(DelimiterType.RightSquareBracket, "]", _line, _chPos);
break;
}
if (_semanticToken is null)
{
throw new InvalidOperationException();
}
_tokens.Add(_semanticToken);
_state = StateType.Start;
}
private void HandleOperatorState()
{
switch (_ch)
{
case '+': // 识别 +
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Plus, "+", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '-': // 识别 -
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Minus, "-", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '*': // 识别 *
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Multiply, "*", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '/': // 识别 /
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Divide, "/", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '=':
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Equal, "=", _line, _chPos);
AddToTokens(_semanticToken);
break;
case '<':
Cat();
GetChar();
if (_ch == '=')
{
// 识别 <=
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.LessEqual, "<=", _line, _chPos);
AddToTokens(_semanticToken);
}
else if (_ch == '>')
{
// 识别 <>
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.NotEqual, ">", _line, _chPos);
AddToTokens(_semanticToken);
}
else
{
// 识别 <
Retract();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Less, "<", _line, _chPos);
AddToTokens(_semanticToken);
}
break;
case '>':
Cat();
GetChar();
if (_ch == '=')
{
// 识别 >=
Cat();
_semanticToken = LexemeFactory.MakeToken(OperatorType.GreaterEqual, ">=", _line, _chPos);
AddToTokens(_semanticToken);
}
else
{
// 识别 >
Retract();
_semanticToken = LexemeFactory.MakeToken(OperatorType.Greater, ">", _line, _chPos);
AddToTokens(_semanticToken);
}
break;
default:
throw new LexemeException(LexemeErrorType.UnknownCharacterOrString, _line, _chPos, "Illegal lexeme.");
}
_state = StateType.Start;
}
private void AddToTokens(SemanticToken semanticToken)
{
_tokens.Add(semanticToken);
}
private void Cat()
{
_tokenBuilder.Append(_ch); // 使用StringBuilder追加字符
}
private string GetCurrentTokenString()
{
return _tokenBuilder.ToString(); // 从StringBuilder获取当前记号的字符串
}
private void ResetTokenBuilder()
{
_tokenBuilder.Clear(); // 清空StringBuilder以复用
}
private char PeekNextChar()
{
// 确认下一个位置是否仍在buffer的范围内
if (reader.TryPeekChar(out char? c))
{
return c.Value;
}
else
{
return char.MinValue;
}
}
private void GetChar()
{
if (_finish)
{
return;
}
_finish = !reader.MoveNext();
if (_finish)
{
_ch = char.MinValue;
return;
}
_ch = reader.Current;
_line = reader.Line;
_chPos = reader.Pos;
}
private void Retract()
{
reader.Retract();
}
}

View File

@ -2,7 +2,7 @@
<PropertyGroup> <PropertyGroup>
<OutputType>Exe</OutputType> <OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework> <TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
</PropertyGroup> </PropertyGroup>

View File

@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk.Web"> <Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup> <PropertyGroup>
<TargetFramework>net8.0</TargetFramework> <TargetFramework>net9.0</TargetFramework>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<SpaRoot>client-app</SpaRoot> <SpaRoot>client-app</SpaRoot>
@ -14,7 +14,7 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.3" /> <PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="9.0.*" />
<PackageReference Include="MongoDB.Driver.GridFS" Version="2.25.0" /> <PackageReference Include="MongoDB.Driver.GridFS" Version="2.25.0" />
<PackageReference Include="MongoDB.EntityFrameworkCore" Version="7.0.0-preview.1" /> <PackageReference Include="MongoDB.EntityFrameworkCore" Version="7.0.0-preview.1" />
<PackageReference Include="SkiaSharp" Version="2.88.8" /> <PackageReference Include="SkiaSharp" Version="2.88.8" />

View File

@ -1,6 +1,6 @@
FROM mcr.microsoft.com/dotnet/aspnet:8.0 FROM mcr.microsoft.com/dotnet/aspnet:9.0
RUN apt update RUN apt update
RUN apt install libfontconfig1 -y RUN apt install libfontconfig1 -y
WORKDIR /App WORKDIR /App
COPY bin/Release/net8.0/publish . COPY bin/Release/net9.0/publish .
ENTRYPOINT ["dotnet", "Canon.Server.dll"] ENTRYPOINT ["dotnet", "Canon.Server.dll"]

View File

@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<TargetFramework>net8.0</TargetFramework> <TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>

View File

@ -1,66 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.0.31903.59
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Canon.Core", "Canon.Core\Canon.Core.csproj", "{63EC6CDA-0BF2-4DC6-BEC1-5A3083130E89}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Canon.Console", "Canon.Console\Canon.Console.csproj", "{3D1C0BA2-57F2-41B2-B024-7A0E54A91DA0}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "scripts", "scripts", "{CA16F23D-8355-4956-B929-082F92CE0C21}"
ProjectSection(SolutionItems) = preProject
scripts\build.sh = scripts\build.sh
scripts\integration_test.py = scripts\integration_test.py
scripts\Dockerfile-build = scripts\Dockerfile-build
scripts\docker-compose.yaml = scripts\docker-compose.yaml
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Canon.Tests", "Canon.Tests\Canon.Tests.csproj", "{E5F2B97B-3766-466D-9309-BA361F0CE15E}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".gitea", ".gitea", "{AECBE745-8E56-49DE-B85E-CEF14DE65134}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{EAA3629C-CD74-4E1F-A7F8-76D1FF0EC925}"
ProjectSection(SolutionItems) = preProject
.gitea\workflows\test.yaml = .gitea\workflows\test.yaml
.gitea\workflows\integration_test.yaml = .gitea\workflows\integration_test.yaml
.gitea\workflows\build.yaml = .gitea\workflows\build.yaml
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Canon.Server", "Canon.Server\Canon.Server.csproj", "{401112EA-1A87-4D1C-9B6D-085309F4137E}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Canon.Generator", "Canon.Generator\Canon.Generator.csproj", "{32C103C4-589C-4DC2-B173-55B1799B62CE}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{63EC6CDA-0BF2-4DC6-BEC1-5A3083130E89}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{63EC6CDA-0BF2-4DC6-BEC1-5A3083130E89}.Debug|Any CPU.Build.0 = Debug|Any CPU
{63EC6CDA-0BF2-4DC6-BEC1-5A3083130E89}.Release|Any CPU.ActiveCfg = Release|Any CPU
{63EC6CDA-0BF2-4DC6-BEC1-5A3083130E89}.Release|Any CPU.Build.0 = Release|Any CPU
{3D1C0BA2-57F2-41B2-B024-7A0E54A91DA0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3D1C0BA2-57F2-41B2-B024-7A0E54A91DA0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3D1C0BA2-57F2-41B2-B024-7A0E54A91DA0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3D1C0BA2-57F2-41B2-B024-7A0E54A91DA0}.Release|Any CPU.Build.0 = Release|Any CPU
{E5F2B97B-3766-466D-9309-BA361F0CE15E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E5F2B97B-3766-466D-9309-BA361F0CE15E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E5F2B97B-3766-466D-9309-BA361F0CE15E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E5F2B97B-3766-466D-9309-BA361F0CE15E}.Release|Any CPU.Build.0 = Release|Any CPU
{401112EA-1A87-4D1C-9B6D-085309F4137E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{401112EA-1A87-4D1C-9B6D-085309F4137E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{401112EA-1A87-4D1C-9B6D-085309F4137E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{401112EA-1A87-4D1C-9B6D-085309F4137E}.Release|Any CPU.Build.0 = Release|Any CPU
{32C103C4-589C-4DC2-B173-55B1799B62CE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{32C103C4-589C-4DC2-B173-55B1799B62CE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{32C103C4-589C-4DC2-B173-55B1799B62CE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{32C103C4-589C-4DC2-B173-55B1799B62CE}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{EAA3629C-CD74-4E1F-A7F8-76D1FF0EC925} = {AECBE745-8E56-49DE-B85E-CEF14DE65134}
EndGlobalSection
EndGlobal

23
Canon.slnx Normal file
View File

@ -0,0 +1,23 @@
<Solution>
<Folder Name="/.gitea/" />
<Folder Name="/.gitea/workflows/">
<File Path=".gitea/workflows/build.yaml" />
<File Path=".gitea/workflows/integration_test.yaml" />
<File Path=".gitea/workflows/test.yaml" />
</Folder>
<Folder Name="/scripts/">
<File Path="scripts/build.sh" />
<File Path="scripts/docker-compose.yaml" />
<File Path="scripts/Dockerfile-build" />
<File Path="scripts/integration_test.py" />
</Folder>
<Folder Name="/Solution Files/">
<File Path="LICENSE" />
<File Path="README.md" />
</Folder>
<Project Path="Canon.Console/Canon.Console.csproj" />
<Project Path="Canon.Core/Canon.Core.csproj" />
<Project Path="Canon.Generator/Canon.Generator.csproj" />
<Project Path="Canon.Server/Canon.Server.csproj" />
<Project Path="Canon.Tests/Canon.Tests.csproj" />
</Solution>

View File

@ -12,7 +12,7 @@
我们在此次课程设计中使用`dotnet`平台实现了一个名为`Canon`的Pascal-S编译器。 我们在此次课程设计中使用`dotnet`平台实现了一个名为`Canon`的Pascal-S编译器。
> Cano n中文名卡农意为“规律”亦是一种音乐作曲技巧。 > Canon中文名卡农意为“规律”亦是一种音乐作曲技巧。
编译器全部为自行编程实现,没有使用类似于`flex`和`bison`之类的前端辅助工具。词法分析使用自行实现的自动机算法,语法分析使用`LR(1)`文法,在项目中实现了一个简单的`LR(1)`分析器生成工具。语义分析和代码生成使用类似于语法制导翻译的技术,详细设计见课程设计说明。 编译器全部为自行编程实现,没有使用类似于`flex`和`bison`之类的前端辅助工具。词法分析使用自行实现的自动机算法,语法分析使用`LR(1)`文法,在项目中实现了一个简单的`LR(1)`分析器生成工具。语义分析和代码生成使用类似于语法制导翻译的技术,详细设计见课程设计说明。
@ -39,6 +39,51 @@ latexmk main.tex
项目中提供了一个公开测试集的自动测试工具,使用`python`编写,需要在系统中安装`fpc`编译器和`gcc`编译器。脚本会自动使用`fpc`编译器和自行实现的编译器`pascc`编译所有的公开测试集输入代码,并比对两个编译器输出文件的执行结果是否一致。使用方法可参见`CI`文件`.gitea/workflows/integration_test.yaml`。 项目中提供了一个公开测试集的自动测试工具,使用`python`编写,需要在系统中安装`fpc`编译器和`gcc`编译器。脚本会自动使用`fpc`编译器和自行实现的编译器`pascc`编译所有的公开测试集输入代码,并比对两个编译器输出文件的执行结果是否一致。使用方法可参见`CI`文件`.gitea/workflows/integration_test.yaml`。
### 构建
为了项目在头歌平台上运行,`Canon.Console`项目支持通过AOT编译构建出一个静态链接musl libc的可执行文件。使用此种构建方式除了需要安装.NET SDK
还需要参考[交叉编译文档](https://learn.microsoft.com/zh-cn/dotnet/core/deploying/native-aot/cross-compile)安装相关的依赖,例如`clang`和`musl`。
使用下面指令进行编译:
```shell
cd Canon.Console
dotnet publish -r linux-musl-x64
```
编译好的可执行文件位于`bin/Release/net9.0/linux-musl-x64/publish`,可执行文件名为`Canon.Console`。
### 在线编译网站
为了获得更好的调试体验和更好的分数,我们提供了一个在线编译运行网站,在获得编译结果的同时可以绘制输入源代码的语法树。
![website-overview](./assets/website-overview.png)
![syntax-tree](./assets/syntax-tree.png)
该网站的后端位于`Canon.Server`中,该网站的前端位于`Canon.Server/client-app`中。
该网站的后端依赖`mongodb`数据库,使用`docker`可以方便的启动一个`mongodb`数据库实例:
```shell
docker run -d -p 27017:27017 mongo:7.0-jammy
```
启动网站后端:
```shell
cd Canon.Server
dotnet run
```
启动网站前端:
```shell
cd Canon.Server/client-app
pnpm install
pnpm run dev
```
## 支持 ## 支持
如果您在学习或者是抄袭的过程中发现了问题,我们十分欢迎您提出,您可以通过发起`issue`或者是发送电子邮件的方式联系我们。 如果您在学习或者是抄袭的过程中发现了问题,我们十分欢迎您提出,您可以通过发起`issue`或者是发送电子邮件的方式联系我们。

BIN
assets/syntax-tree.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 159 KiB

BIN
assets/website-overview.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB