Skip to content

Commit

Permalink
feat(parser): allow doc-comments anywhere (#154)
Browse files Browse the repository at this point in the history
* feat(parser): allow doc-comments anywhere

Rewrites how the parser handles comments/docs to allow doc-comments in any
place in source code, like comments.

* add fn comment

* perf: compact next_token

* perf: keep skipping comments

* Revert "perf: keep skipping comments"

This reverts commit fe93542.

* perf: unwrap EOF

* Revert "perf: unwrap EOF"

This reverts commit 6095a53.
  • Loading branch information
DaniPopes authored Dec 5, 2024
1 parent eb0c51a commit bfe2320
Show file tree
Hide file tree
Showing 11 changed files with 124 additions and 89 deletions.
2 changes: 1 addition & 1 deletion crates/ast/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ impl std::ops::Deref for Arena {
pub type DocComments<'ast> = Box<'ast, [DocComment]>;

/// A single doc-comment: `/// foo`, `/** bar */`.
#[derive(Clone, Debug)]
#[derive(Clone, Copy, Debug)]
pub struct DocComment {
/// The comment kind.
pub kind: CommentKind,
Expand Down
41 changes: 40 additions & 1 deletion crates/ast/src/token.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! Solidity source code token.
use crate::ast::{BinOp, BinOpKind, UnOp, UnOpKind};
use crate::{
ast::{BinOp, BinOpKind, UnOp, UnOpKind},
DocComment,
};
use solar_interface::{diagnostics::ErrorGuaranteed, Ident, Span, Symbol};
use std::{borrow::Cow, fmt};

Expand Down Expand Up @@ -402,6 +405,12 @@ impl TokenKind {
matches!(self, Self::Comment(false, ..))
}

/// Returns `true` if the token kind is a comment or doc-comment.
#[inline]
pub const fn is_comment_or_doc(&self) -> bool {
matches!(self, Self::Comment(..))
}

/// Glues two token kinds together.
pub const fn glue(&self, other: &Self) -> Option<Self> {
use BinOpToken::*;
Expand Down Expand Up @@ -519,6 +528,30 @@ impl Token {
}
}

/// Returns the comment if the kind is [`TokenKind::Comment`], and whether it's a doc-comment.
#[inline]
pub const fn comment(&self) -> Option<(bool, DocComment)> {
match self.kind {
TokenKind::Comment(is_doc, kind, symbol) => {
Some((is_doc, DocComment { span: self.span, kind, symbol }))
}
_ => None,
}
}

/// Returns the comment if the kind is [`TokenKind::Comment`].
///
/// Does not check that `is_doc` is `true`.
#[inline]
pub const fn doc(&self) -> Option<DocComment> {
match self.kind {
TokenKind::Comment(_, kind, symbol) => {
Some(DocComment { span: self.span, kind, symbol })
}
_ => None,
}
}

/// Returns `true` if the token is an operator.
#[inline]
pub const fn is_op(&self) -> bool {
Expand Down Expand Up @@ -659,6 +692,12 @@ impl Token {
self.kind.is_comment()
}

/// Returns `true` if the token is a comment or doc-comment.
#[inline]
pub const fn is_comment_or_doc(&self) -> bool {
self.kind.is_comment_or_doc()
}

/// Returns `true` if the token is a location specifier.
#[inline]
pub fn is_location_specifier(&self) -> bool {
Expand Down
3 changes: 0 additions & 3 deletions crates/parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ impl<'sess, 'src> Lexer<'sess, 'src> {
if token.is_eof() {
break;
}
if token.is_comment() {
continue;
}
tokens.push(token);
}
trace!(
Expand Down
3 changes: 1 addition & 2 deletions crates/parse/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
&mut self,
with: Option<Box<'ast, Expr<'ast>>>,
) -> PResult<'sess, Box<'ast, Expr<'ast>>> {
self.ignore_doc_comments();
let expr = self.parse_binary_expr(4, with)?;
if self.eat(&TokenKind::Question) {
let then = self.parse_expr()?;
Expand Down Expand Up @@ -198,7 +197,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
// Array or tuple expression.
let TokenKind::OpenDelim(close_delim) = self.token.kind else { unreachable!() };
let is_array = close_delim == Delimiter::Bracket;
let list = self.parse_optional_items_seq(close_delim, |this| this.parse_expr())?;
let list = self.parse_optional_items_seq(close_delim, Self::parse_expr)?;
if is_array {
if !list.iter().all(Option::is_some) {
let msg = "array expression components cannot be empty";
Expand Down
12 changes: 3 additions & 9 deletions crates/parse/src/parser/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
/// Parses an item.
#[instrument(level = "debug", skip_all)]
pub fn parse_item(&mut self) -> PResult<'sess, Option<Item<'ast>>> {
let docs = self.parse_doc_comments()?;
let docs = self.parse_doc_comments();
self.parse_spanned(Self::parse_item_kind)
.map(|(span, kind)| kind.map(|kind| Item { docs, span, kind }))
}
Expand Down Expand Up @@ -329,10 +329,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
/// Parses an enum definition.
fn parse_enum(&mut self) -> PResult<'sess, ItemEnum<'ast>> {
let name = self.parse_ident()?;
let variants = self.parse_delim_comma_seq(Delimiter::Brace, true, |this| {
this.ignore_doc_comments();
this.parse_ident()
})?;
let variants = self.parse_delim_comma_seq(Delimiter::Brace, true, Self::parse_ident)?;
Ok(ItemEnum { name, variants })
}

Expand Down Expand Up @@ -633,10 +630,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
lo = lo.with_lo(ty.span.lo());
ty
}
None => {
self.ignore_doc_comments();
self.parse_type()?
}
None => self.parse_type()?,
};

if ty.is_function()
Expand Down
109 changes: 53 additions & 56 deletions crates/parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,18 @@ pub struct Parser<'sess, 'ast> {
expected_tokens: Vec<ExpectedToken>,
/// The span of the last unexpected token.
last_unexpected_token_span: Option<Span>,
/// The current doc-comments.
docs: Vec<DocComment>,

/// The token stream.
tokens: std::vec::IntoIter<Token>,

/// Whether the parser is in Yul mode.
///
/// Currently, this can only happen when parsing a Yul "assembly" block.
in_yul: bool,
/// Whether the parser is currently parsing a contract block.
in_contract: bool,

/// The token stream.
tokens: std::vec::IntoIter<Token>,
}

#[derive(Clone, Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -117,25 +119,18 @@ impl SeqSep {

impl<'sess, 'ast> Parser<'sess, 'ast> {
/// Creates a new parser.
///
/// # Panics
///
/// Panics if any of the tokens are comments.
pub fn new(sess: &'sess Session, arena: &'ast ast::Arena, tokens: Vec<Token>) -> Self {
debug_assert!(
tokens.iter().all(|t| !t.is_comment()),
"comments should be stripped before parsing"
);
let mut parser = Self {
sess,
arena,
token: Token::DUMMY,
prev_token: Token::DUMMY,
expected_tokens: Vec::with_capacity(8),
last_unexpected_token_span: None,
docs: Vec::with_capacity(4),
tokens: tokens.into_iter(),
in_yul: false,
in_contract: false,
tokens: tokens.into_iter(),
};
parser.bump();
parser
Expand Down Expand Up @@ -710,30 +705,61 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {

/// Advance the parser by one token.
pub fn bump(&mut self) {
let mut next = self.tokens.next().unwrap_or(Token::EOF);
if next.span.is_dummy() {
// Tweak the location for better diagnostics.
next.span = self.token.span;
let next = self.next_token();
if next.is_comment_or_doc() {
return self.bump_trivia(next);
}
self.inlined_bump_with(next);
}

/// Advance the parser by one token using provided token as the next one.
///
/// # Panics
///
/// Panics if the provided token is a comment.
pub fn bump_with(&mut self, next: Token) {
self.inlined_bump_with(next);
}

/// This always-inlined version should only be used on hot code paths.
#[inline(always)]
fn inlined_bump_with(&mut self, next_token: Token) {
fn inlined_bump_with(&mut self, next: Token) {
#[cfg(debug_assertions)]
if next_token.is_comment() {
self.comment_in_stream(next_token.span);
if next.is_comment_or_doc() {
self.dcx().bug("`bump_with` should not be used with comments").span(next.span).emit();
}
self.prev_token = std::mem::replace(&mut self.token, next);
self.expected_tokens.clear();
}

/// Bumps comments and docs.
///
/// Pushes docs to `self.docs`. Retrieve them with `parse_doc_comments`.
#[cold]
fn bump_trivia(&mut self, next: Token) {
self.docs.clear();

debug_assert!(next.is_comment_or_doc());
self.prev_token = std::mem::replace(&mut self.token, next);
while let Some((is_doc, doc)) = self.token.comment() {
if is_doc {
self.docs.push(doc);
}
// Don't set `prev_token` on purpose.
self.token = self.next_token();
}
self.prev_token = std::mem::replace(&mut self.token, next_token);

self.expected_tokens.clear();
}

/// Advances the internal `tokens` iterator, without updating the parser state.
///
/// Use [`bump`](Self::bump) and [`token`](Self::token) instead.
#[inline(always)]
fn next_token(&mut self) -> Token {
self.tokens.next().unwrap_or(Token { kind: TokenKind::Eof, span: self.token.span })
}

/// Returns the token `dist` tokens ahead of the current one.
///
/// [`Eof`](Token::EOF) will be returned if the look-ahead is any distance past the end of the
Expand Down Expand Up @@ -790,50 +816,21 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
}
}

/// Parses and ignores contiguous doc comments.
#[inline]
pub fn ignore_doc_comments(&mut self) {
if matches!(self.token.kind, TokenKind::Comment(..)) {
self.ignore_doc_comments_inner();
}
}

#[cold]
fn ignore_doc_comments_inner(&mut self) {
while let Token { span, kind: TokenKind::Comment(is_doc, _kind, _symbol) } = self.token {
if !is_doc {
self.comment_in_stream(span);
}
self.bump();
}
}

/// Parses contiguous doc comments. Can be empty.
#[inline]
pub fn parse_doc_comments(&mut self) -> PResult<'sess, DocComments<'ast>> {
if matches!(self.token.kind, TokenKind::Comment(..)) {
pub fn parse_doc_comments(&mut self) -> DocComments<'ast> {
if !self.docs.is_empty() {
self.parse_doc_comments_inner()
} else {
Ok(Default::default())
}
}

#[cold]
fn parse_doc_comments_inner(&mut self) -> PResult<'sess, DocComments<'ast>> {
let mut doc_comments = SmallVec::<[_; 4]>::new();
while let Token { span, kind: TokenKind::Comment(is_doc, kind, symbol) } = self.token {
if !is_doc {
self.comment_in_stream(span);
}
doc_comments.push(DocComment { kind, span, symbol });
self.bump();
Default::default()
}
Ok(self.alloc_smallvec(doc_comments))
}

#[cold]
fn comment_in_stream(&self, span: Span) -> ! {
self.dcx().bug("comments should not be in the token stream").span(span).emit()
fn parse_doc_comments_inner(&mut self) -> DocComments<'ast> {
let docs = self.arena.alloc_slice_copy(&self.docs);
self.docs.clear();
docs
}

/// Parses a qualified identifier: `foo.bar.baz`.
Expand Down
4 changes: 2 additions & 2 deletions crates/parse/src/parser/stmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
/// Parses a statement.
#[instrument(level = "debug", skip_all)]
pub fn parse_stmt(&mut self) -> PResult<'sess, Stmt<'ast>> {
let docs = self.parse_doc_comments()?;
let docs = self.parse_doc_comments();
self.parse_spanned(Self::parse_stmt_kind).map(|(span, kind)| Stmt { docs, kind, span })
}

Expand Down Expand Up @@ -172,7 +172,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {

/// Parses a simple statement. These are just variable declarations and expressions.
fn parse_simple_stmt(&mut self) -> PResult<'sess, Stmt<'ast>> {
let docs = self.parse_doc_comments()?;
let docs = self.parse_doc_comments();
self.parse_spanned(Self::parse_simple_stmt_kind).map(|(span, kind)| Stmt {
docs,
kind,
Expand Down
7 changes: 3 additions & 4 deletions crates/parse/src/parser/yul.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
/// See: <https://github.com/ethereum/solidity/blob/eff410eb746f202fe756a2473fd0c8a718348457/libyul/ObjectParser.cpp#L50>
#[instrument(level = "debug", skip_all)]
pub fn parse_yul_file_object(&mut self) -> PResult<'sess, Object<'ast>> {
let docs = self.parse_doc_comments()?;
let docs = self.parse_doc_comments();
let object = if self.check_keyword(sym::object) {
self.parse_yul_object(docs)
} else {
Expand Down Expand Up @@ -40,7 +40,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
let mut children = Vec::new();
let mut data = Vec::new();
loop {
let docs = self.parse_doc_comments()?;
let docs = self.parse_doc_comments();
if self.check_keyword(sym::object) {
children.push(self.parse_yul_object(docs)?);
} else if self.check_keyword(sym::data) {
Expand Down Expand Up @@ -87,7 +87,7 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {

/// Parses a Yul statement, without setting `in_yul`.
pub fn parse_yul_stmt_unchecked(&mut self) -> PResult<'sess, Stmt<'ast>> {
let docs = self.parse_doc_comments()?;
let docs = self.parse_doc_comments();
self.parse_spanned(Self::parse_yul_stmt_kind).map(|(span, kind)| Stmt { docs, span, kind })
}

Expand Down Expand Up @@ -234,7 +234,6 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {

/// Parses a Yul expression.
fn parse_yul_expr(&mut self) -> PResult<'sess, Expr<'ast>> {
self.ignore_doc_comments();
self.parse_spanned(Self::parse_yul_expr_kind).map(|(span, kind)| Expr { span, kind })
}

Expand Down
17 changes: 16 additions & 1 deletion tests/ui/parser/unusual_doc_comments.sol
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ function f(
) returns (
/// @dev ret
uint y
) {
)
/// @dev why
{
/// @dev statement
SingleVaultSFData memory data = SingleVaultSFData(
/// a
Expand All @@ -25,4 +27,17 @@ function f(
SuperformRouter(payable(getContract(SOURCE_CHAIN, "SuperformRouter"))).singleXChainSingleVaultDeposit{
value: 2 ether
}(req);
/// @dev ????
}

/// @dev contract
contract C {
address[] public PROTOCOL_ADMINS = [
0xd26b38a64C812403fD3F87717624C80852cD6D61,
/// @dev ETH https://app.onchainden.com/safes/eth:0xd26b38a64c812403fd3f87717624c80852cd6d61
0xf70A19b67ACC4169cA6136728016E04931D550ae
/// @dev what the hell
]
/// @dev sure
;
}
Loading

0 comments on commit bfe2320

Please sign in to comment.