fix: 区分字符和字符串 (#74)

Co-authored-by: Huaps <1183155719@qq.com>
Reviewed-on: PostGuard/Canon#74
This commit is contained in:
jackfiled 2024-05-04 13:57:14 +08:00
parent 8da24523c9
commit 6e8e3885ac
7 changed files with 74 additions and 30 deletions

View File

@ -8,6 +8,7 @@ public enum SemanticTokenType
Delimiter, Delimiter,
Identifier, Identifier,
Character, Character,
String,
Empty, Empty,
/// <summary> /// <summary>
/// 语法分析中的栈底符号 /// 语法分析中的栈底符号

View File

@ -16,6 +16,13 @@ public static class LexemeFactory
}; };
token = characterSemanticToken; token = characterSemanticToken;
break; break;
case SemanticTokenType.String:
StringSemanticToken stringSemanticToken = new()
{
LinePos = line, CharacterPos = chPos, LiteralValue = literal,
};
token = stringSemanticToken;
break;
case SemanticTokenType.Identifier: case SemanticTokenType.Identifier:
IdentifierSemanticToken identifierSemanticToken = new() IdentifierSemanticToken identifierSemanticToken = new()
{ {

View File

@ -80,6 +80,7 @@ public class Lexer : ILexer
} }
_tokens.Add(SemanticToken.End); _tokens.Add(SemanticToken.End);
return _tokens; return _tokens;
} }
@ -447,7 +448,6 @@ public class Lexer : ILexer
} }
break; break;
case '\'': case '\'':
case '\"':
{ {
// 重置_token准备收集字符串内容 // 重置_token准备收集字符串内容
ResetTokenBuilder(); ResetTokenBuilder();
@ -464,8 +464,18 @@ public class Lexer : ILexer
} }
} }
string currentString = GetCurrentTokenString();
if (currentString.Length > 1)
{
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.String,
currentString, _line, _chPos);
}
else
{
_semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Character, _semanticToken = LexemeFactory.MakeToken(SemanticTokenType.Character,
GetCurrentTokenString(), _line, _chPos); currentString, _line, _chPos);
}
ResetTokenBuilder(); ResetTokenBuilder();

View File

@ -104,6 +104,32 @@ public abstract class SemanticToken : IEquatable<SemanticToken>
public class CharacterSemanticToken : SemanticToken public class CharacterSemanticToken : SemanticToken
{ {
public override SemanticTokenType TokenType => SemanticTokenType.Character; public override SemanticTokenType TokenType => SemanticTokenType.Character;
/// <summary>
/// 获得令牌代表的字符
/// </summary>
/// <returns>字符</returns>
public char ParseAsCharacter()
{
return char.Parse(LiteralValue);
}
}
/// <summary>
/// 字符串类型记号
/// </summary>
public class StringSemanticToken : SemanticToken
{
public override SemanticTokenType TokenType => SemanticTokenType.String;
/// <summary>
/// 获得令牌代表的字符串
/// </summary>
/// <returns>字符串</returns>
public string ParseAsString()
{
return LiteralValue;
}
} }
/// <summary> /// <summary>

View File

@ -39,7 +39,7 @@ public class PascalGrammarTests
{ {
const string program = """ const string program = """
program exFunction; program exFunction;
const str = 'result is : '; const str = 'a';
var a, b : Integer; var a, b : Integer;
begin begin
writeln( str, ret ); writeln( str, ret );

View File

@ -7,33 +7,33 @@ using Canon.Tests.Utils;
namespace Canon.Tests.LexicalParserTests namespace Canon.Tests.LexicalParserTests
{ {
public class CharacterTypeTests public class CharacterTypeTests(ITestOutputHelper testOutputHelper)
{ {
private readonly ITestOutputHelper _testOutputHelper;
private readonly ILexer _lexer = new Lexer(); private readonly ILexer _lexer = new Lexer();
public CharacterTypeTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;
}
[Theory] [Theory]
[InlineData("'a'", "a")] [InlineData("'a'", 'a')]
[InlineData("'Hello, World!'", "Hello, World!")] [InlineData("'+'", '+')]
public void TestCharacterType(string input, char expectedResult)
public void TestCharacterType(string input, string? expectedResult)
{ {
IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input)); IEnumerable<SemanticToken> tokensEnumerable = _lexer.Tokenize(new StringSourceReader(input));
List<SemanticToken> tokens = tokensEnumerable.ToList(); List<SemanticToken> tokens = tokensEnumerable.ToList();
if (expectedResult == null)
{ testOutputHelper.WriteLine(tokens[0].LiteralValue);
Assert.Throws<LexemeException>(() => tokens);
}
else
{
_testOutputHelper.WriteLine(tokens[0].LiteralValue);
Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType); Assert.Equal(SemanticTokenType.Character, tokens[0].TokenType);
Assert.Equal(expectedResult, tokens[0].LiteralValue); Assert.Equal(expectedResult, tokens[0].Convert<CharacterSemanticToken>().ParseAsCharacter());
} }
[Theory]
[InlineData("'Hello, world'", "Hello, world")]
[InlineData("'asdfasdf'", "asdfasdf")]
public void StringTypeTest(string input, string expect)
{
IEnumerable<SemanticToken> tokens = _lexer.Tokenize(new StringSourceReader(input));
SemanticToken token = tokens.First();
Assert.Equal(SemanticTokenType.String, token.TokenType);
Assert.Equal(expect, token.Convert<StringSemanticToken>().ParseAsString());
} }
[Theory] [Theory]
@ -42,11 +42,11 @@ namespace Canon.Tests.LexicalParserTests
[InlineData("'This", 1, 5, LexemeErrorType.UnclosedStringLiteral)] [InlineData("'This", 1, 5, LexemeErrorType.UnclosedStringLiteral)]
[InlineData("x @", 1, 3, LexemeErrorType.UnknownCharacterOrString)] [InlineData("x @", 1, 3, LexemeErrorType.UnknownCharacterOrString)]
//[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)] //[InlineData("\"x\'", 1, 3, LexemeException.LexemeErrorType.UnclosedStringLiteral)]
public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition, LexemeErrorType expectedErrorType) public void TestParseCharacterError(string input, uint expectedLine, uint expectedCharPosition,
LexemeErrorType expectedErrorType)
{ {
var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(input)).ToList()); var ex = Assert.Throws<LexemeException>(() => _lexer.Tokenize(new StringSourceReader(input)).ToList());
_testOutputHelper.WriteLine(ex.ToString()); testOutputHelper.WriteLine(ex.ToString());
Assert.Equal(expectedErrorType, ex.ErrorType); Assert.Equal(expectedErrorType, ex.ErrorType);
Assert.Equal(expectedLine, ex.Line); Assert.Equal(expectedLine, ex.Line);
Assert.Equal(expectedCharPosition, ex.CharPosition); Assert.Equal(expectedCharPosition, ex.CharPosition);

View File

@ -17,9 +17,9 @@ public class LexicalFileTests(ITestOutputHelper testOutputHelper)
string pascalProgram = """ string pascalProgram = """
program HelloWorld; program HelloWorld;
var var
message: string; message: char;
begin begin
message := 'hello, world!'; message := 'h';
writeln(message); writeln(message);
end. end.
"""; """;
@ -32,7 +32,7 @@ public class LexicalFileTests(ITestOutputHelper testOutputHelper)
SemanticTokenType.Keyword, SemanticTokenType.Keyword,
SemanticTokenType.Identifier, SemanticTokenType.Identifier,
SemanticTokenType.Delimiter, SemanticTokenType.Delimiter,
SemanticTokenType.Identifier, SemanticTokenType.Keyword,
SemanticTokenType.Delimiter, SemanticTokenType.Delimiter,
SemanticTokenType.Keyword, SemanticTokenType.Keyword,
SemanticTokenType.Identifier, SemanticTokenType.Identifier,