Skip to content

[Parser] Do not eagerly lex strings #6543

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 16 additions & 22 deletions src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1004,17 +1004,6 @@ std::optional<float> Token::getF32() const {
return {};
}

std::optional<std::string_view> Token::getString() const {
if (auto* tok = std::get_if<StringTok>(&data)) {
if (tok->str) {
return std::string_view(*tok->str);
}
// Remove quotes.
return span.substr(1, span.size() - 2);
}
return {};
}

void Lexer::skipSpace() {
while (true) {
if (auto ctx = annotation(next())) {
Expand Down Expand Up @@ -1054,6 +1043,22 @@ bool Lexer::takeRParen() {
return false;
}

std::optional<std::string> Lexer::takeString() {
if (curr) {
return std::nullopt;
}
if (auto result = str(next())) {
index += result->span.size();
advance();
if (result->str) {
return result->str;
}
// Remove quotes.
return std::string(result->span.substr(1, result->span.size() - 2));
}
return std::nullopt;
}

std::optional<Name> Lexer::takeID() {
if (curr) {
return std::nullopt;
Expand Down Expand Up @@ -1132,8 +1137,6 @@ void Lexer::lexToken() {
tok = Token{t->span, IntTok{t->n, t->sign}};
} else if (auto t = float_(next())) {
tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
} else if (auto t = str(next())) {
tok = Token{t->span, StringTok{t->str}};
} else {
// TODO: Do something about lexing errors.
curr = std::nullopt;
Expand Down Expand Up @@ -1204,15 +1207,6 @@ std::ostream& operator<<(std::ostream& os, const FloatTok& tok) {
return os << tok.d;
}

std::ostream& operator<<(std::ostream& os, const StringTok& tok) {
if (tok.str) {
os << '"' << *tok.str << '"';
} else {
os << "(raw string)";
}
return os;
}

std::ostream& operator<<(std::ostream& os, const Token& tok) {
std::visit([&](const auto& t) { os << t; }, tok.data);
return os << " \"" << tok.span << "\"";
Expand Down
36 changes: 9 additions & 27 deletions src/parser/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,8 @@ struct FloatTok {
friend std::ostream& operator<<(std::ostream&, const FloatTok&);
};

struct StringTok {
// If the string contains escapes, this is its contents.
std::optional<std::string> str;

bool operator==(const StringTok& other) const { return str == other.str; }
friend std::ostream& operator<<(std::ostream&, const StringTok&);
};

struct Token {
using Data = std::variant<IntTok, FloatTok, StringTok>;
using Data = std::variant<IntTok, FloatTok>;
std::string_view span;
Data data;

Expand All @@ -90,7 +82,6 @@ struct Token {
template<typename T> std::optional<T> getI() const;
std::optional<double> getF64() const;
std::optional<float> getF32() const;
std::optional<std::string_view> getString() const;

bool operator==(const Token&) const;
friend std::ostream& operator<<(std::ostream& os, const Token&);
Expand Down Expand Up @@ -145,6 +136,10 @@ struct Lexer {
if (peekLParen() || peekRParen()) {
return true;
}
// Do not count the parentheses in strings.
if (takeString()) {
continue;
}
if (!curr) {
++index;
}
Expand Down Expand Up @@ -218,27 +213,14 @@ struct Lexer {
return std::nullopt;
}

std::optional<std::string> takeString() {
if (curr) {
if (auto s = curr->getString()) {
std::string ret(*s);
advance();
return ret;
}
}
return {};
}
std::optional<std::string> takeString();

std::optional<Name> takeName() {
// TODO: Move this to lexer and validate UTF.
// TODO: Validate UTF.
if (auto str = takeString()) {
// Copy to a std::string to make sure we have a null terminator, otherwise
// the `Name` constructor won't work correctly.
// TODO: Update `Name` to use string_view instead of char* and/or to take
// rvalue strings to avoid this extra copy.
return Name(std::string(*str));
return Name(*str);
}
return {};
return std::nullopt;
}

bool takeSExprStart(std::string_view expected) {
Expand Down