Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport 5.2 changes to parser-standard #2512

Merged
merged 7 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vendor/parser-shims/ocamlformat_parser_shims.mli
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ module Misc : sig
end

module Style : sig
val as_inline_code: (Format.formatter -> 'a -> unit as 'printer) -> 'printer
val as_inline_code: (Format.formatter -> 'a -> unit as 'printer) -> 'printer
(** @since ocaml-5.2 *)

val inline_code: Format.formatter -> string -> unit
Expand Down
12 changes: 8 additions & 4 deletions vendor/parser-standard/ast_helper.ml
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,9 @@ module Typ = struct
Ptyp_object (List.map loop_object_field lst, o)
| Ptyp_class (longident, lst) ->
Ptyp_class (longident, List.map loop lst)
| Ptyp_alias(core_type, string) ->
check_variable var_names t.ptyp_loc string;
Ptyp_alias(loop core_type, string)
| Ptyp_alias(core_type, alias) ->
check_variable var_names alias.loc alias.txt;
Ptyp_alias(loop core_type, alias)
| Ptyp_variant(row_field_list, flag, lbl_lst_option) ->
Ptyp_variant(List.map loop_row_field row_field_list,
flag, lbl_lst_option)
Expand Down Expand Up @@ -216,7 +216,9 @@ module Exp = struct
mk ?loc ?attrs (Pexp_letop {let_; ands; body})
let extension ?loc ?attrs a = mk ?loc ?attrs (Pexp_extension a)
let unreachable ?loc ?attrs () = mk ?loc ?attrs Pexp_unreachable
let hole ?loc ?attrs () = mk ?loc ?attrs Pexp_hole
(* Added *)
let hole ?loc ?attrs () = mk ?loc ?attrs Pexp_hole
(* *)

let case lhs ?guard rhs =
{
Expand Down Expand Up @@ -262,7 +264,9 @@ module Mod = struct
let constraint_ ?loc ?attrs m mty = mk ?loc ?attrs (Pmod_constraint (m, mty))
let unpack ?loc ?attrs e = mk ?loc ?attrs (Pmod_unpack e)
let extension ?loc ?attrs a = mk ?loc ?attrs (Pmod_extension a)
(* Added *)
let hole ?loc ?attrs () = mk ?loc ?attrs Pmod_hole
(* *)
end

module Sig = struct
Expand Down
17 changes: 13 additions & 4 deletions vendor/parser-standard/ast_mapper.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
(* Ensure that record patterns don't miss any field. *)
*)

[@@@ocaml.warning "-60"] module Str = Ast_helper.Str (* For ocamldep *)
[@@@ocaml.warning "+60"]

open Parsetree
open Ast_helper
open Location
Expand All @@ -45,6 +48,7 @@ type mapper = {
constant: mapper -> constant -> constant;
constructor_declaration: mapper -> constructor_declaration
-> constructor_declaration;
directive_argument: mapper -> directive_argument -> directive_argument;
expr: mapper -> expression -> expression;
extension: mapper -> extension -> extension;
extension_constructor: mapper -> extension_constructor
Expand All @@ -68,6 +72,8 @@ type mapper = {
signature_item: mapper -> signature_item -> signature_item;
structure: mapper -> structure -> structure;
structure_item: mapper -> structure_item -> structure_item;
toplevel_directive: mapper -> toplevel_directive -> toplevel_directive;
toplevel_phrase: mapper -> toplevel_phrase -> toplevel_phrase;
typ: mapper -> core_type -> core_type;
type_declaration: mapper -> type_declaration -> type_declaration;
type_extension: mapper -> type_extension -> type_extension;
Expand All @@ -76,9 +82,6 @@ type mapper = {
value_binding: mapper -> value_binding -> value_binding;
value_description: mapper -> value_description -> value_description;
with_constraint: mapper -> with_constraint -> with_constraint;
directive_argument: mapper -> directive_argument -> directive_argument;
toplevel_directive: mapper -> toplevel_directive -> toplevel_directive;
toplevel_phrase: mapper -> toplevel_phrase -> toplevel_phrase;
}

let map_fst f (x, y) = (f x, y)
Expand Down Expand Up @@ -147,7 +150,9 @@ module T = struct
object_ ~loc ~attrs (List.map (object_field sub) l) o
| Ptyp_class (lid, tl) ->
class_ ~loc ~attrs (map_loc sub lid) (List.map (sub.typ sub) tl)
| Ptyp_alias (t, s) -> alias ~loc ~attrs (sub.typ sub t) s
| Ptyp_alias (t, s) ->
let s = map_loc sub s in
alias ~loc ~attrs (sub.typ sub t) s
| Ptyp_variant (rl, b, ll) ->
variant ~loc ~attrs (List.map (row_field sub) rl) b ll
| Ptyp_poly (sl, t) -> poly ~loc ~attrs
Expand Down Expand Up @@ -362,7 +367,9 @@ module M = struct
(sub.module_type sub mty)
| Pmod_unpack e -> unpack ~loc ~attrs (sub.expr sub e)
| Pmod_extension x -> extension ~loc ~attrs (sub.extension sub x)
(* Added *)
| Pmod_hole -> hole ~loc ~attrs ()
(* *)

let map_structure_item sub {pstr_loc = loc; pstr_desc = desc} =
let open Str in
Expand Down Expand Up @@ -471,7 +478,9 @@ module E = struct
(List.map (sub.binding_op sub) ands) (sub.expr sub body)
| Pexp_extension x -> extension ~loc ~attrs (sub.extension sub x)
| Pexp_unreachable -> unreachable ~loc ~attrs ()
(* Added *)
| Pexp_hole -> hole ~loc ~attrs ()
(* *)

let map_binding_op sub {pbop_op; pbop_pat; pbop_exp; pbop_loc} =
let open Exp in
Expand Down
68 changes: 53 additions & 15 deletions vendor/parser-standard/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,34 @@ let get_stored_string () = Buffer.contents string_buffer
let store_string_char c = Buffer.add_char string_buffer c
let store_string_utf_8_uchar u = Buffer.add_utf_8_uchar string_buffer u
let store_string s = Buffer.add_string string_buffer s
let store_substring s ~pos ~len = Buffer.add_substring string_buffer s pos len

let store_lexeme lexbuf = store_string (Lexing.lexeme lexbuf)
let store_normalized_newline newline =
(* #12502: we normalize "\r\n" to "\n" at lexing time,
to avoid behavior difference due to OS-specific
newline characters in string literals.

(For example, Git for Windows will translate \n in versioned
files into \r\n sequences when checking out files on Windows. If
your code contains multiline quoted string literals, the raw
content of the string literal would be different between Git for
Windows users and all other users. Thanks to newline
normalization, the value of the literal as a string constant will
be the same no matter which programming tools are used.)

Many programming languages use the same approach, for example
Java, Javascript, Kotlin, Python, Swift and C++.
*)
(* Our 'newline' regexp accepts \r*\n, but we only wish
to normalize \r?\n into \n -- see the discussion in #12502.
All carriage returns except for the (optional) last one
are reproduced in the output. We implement this by skipping
the first carriage return, if any. *)
let len = String.length newline in
if len = 1
then store_string_char '\n'
else store_substring newline ~pos:1 ~len:(len - 1)

(* To store the position of the beginning of a string and comment *)
let string_start_loc = ref Location.none
Expand Down Expand Up @@ -338,7 +365,7 @@ let prepare_error loc = function
Location.error ~loc ~sub msg
| Keyword_as_label kwd ->
Location.errorf ~loc
"`%s' is a keyword, it cannot be used as label name" kwd
"%a is a keyword, it cannot be used as label name" Style.inline_code kwd
| Invalid_literal s ->
Location.errorf ~loc "Invalid literal %s" s
| Invalid_directive (dir, explanation) ->
Expand Down Expand Up @@ -403,6 +430,7 @@ let hex_float_literal =
('.' ['0'-'9' 'A'-'F' 'a'-'f' '_']* )?
(['p' 'P'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']* )?
let literal_modifier = ['G'-'Z' 'g'-'z']
let raw_ident_escape = "\\#"

rule token = parse
| ('\\' as bs) newline {
Expand All @@ -421,6 +449,8 @@ rule token = parse
| ".~"
{ error lexbuf
(Reserved_sequence (".~", Some "is reserved for use in MetaOCaml")) }
| "~" raw_ident_escape (lowercase identchar * as name) ':'
{ LABEL name }
| "~" (lowercase identchar * as name) ':'
{ check_label_name lexbuf name;
LABEL name }
Expand All @@ -429,12 +459,16 @@ rule token = parse
LABEL name }
| "?"
{ QUESTION }
| "?" raw_ident_escape (lowercase identchar * as name) ':'
{ OPTLABEL name }
| "?" (lowercase identchar * as name) ':'
{ check_label_name lexbuf name;
OPTLABEL name }
| "?" (lowercase_latin1 identchar_latin1 * as name) ':'
{ warn_latin1 lexbuf;
OPTLABEL name }
| raw_ident_escape (lowercase identchar * as name)
{ LIDENT name }
| lowercase identchar * as name
{ try Hashtbl.find keyword_table name
with Not_found -> LIDENT name }
Expand Down Expand Up @@ -493,7 +527,7 @@ rule token = parse
{ CHAR(char_for_octal_code lexbuf 3) }
| "\'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "\'"
{ CHAR(char_for_hexadecimal_code lexbuf 3) }
| "\'" ("\\" _ as esc)
| "\'" ("\\" [^ '#'] as esc)
{ error lexbuf (Illegal_escape (esc, None)) }
| "\'\'"
{ error lexbuf Empty_character_literal }
Expand Down Expand Up @@ -676,9 +710,11 @@ and comment = parse
comment lexbuf }
| "\'\'"
{ store_lexeme lexbuf; comment lexbuf }
| "\'" newline "\'"
| "\'" (newline as nl) "\'"
{ update_loc lexbuf None 1 false 1;
store_lexeme lexbuf;
store_string_char '\'';
store_normalized_newline nl;
store_string_char '\'';
comment lexbuf
}
| "\'" [^ '\\' '\'' '\010' '\013' ] "\'"
Expand All @@ -699,9 +735,9 @@ and comment = parse
comment_start_loc := [];
error_loc loc (Unterminated_comment start)
}
| newline
| newline as nl
{ update_loc lexbuf None 1 false 0;
store_lexeme lexbuf;
store_normalized_newline nl;
comment lexbuf
}
| ident
Expand All @@ -712,9 +748,13 @@ and comment = parse
and string = parse
'\"'
{ lexbuf.lex_start_p }
| '\\' newline ([' ' '\t'] * as space)
| '\\' (newline as nl) ([' ' '\t'] * as space)
{ update_loc lexbuf None 1 false (String.length space);
if in_comment () then store_lexeme lexbuf;
if in_comment () then begin
store_string_char '\\';
store_normalized_newline nl;
store_string space;
end;
string lexbuf
}
| '\\' (['\\' '\'' '\"' 'n' 't' 'b' 'r' ' '] as c)
Expand Down Expand Up @@ -743,11 +783,9 @@ and string = parse
store_lexeme lexbuf;
string lexbuf
}
| newline
{ if not (in_comment ()) then
Location.prerr_warning (Location.curr lexbuf) Warnings.Eol_in_string;
update_loc lexbuf None 1 false 0;
store_lexeme lexbuf;
| newline as nl
{ update_loc lexbuf None 1 false 0;
store_normalized_newline nl;
string lexbuf
}
| eof
Expand All @@ -758,9 +796,9 @@ and string = parse
string lexbuf }

and quoted_string delim = parse
| newline
| newline as nl
{ update_loc lexbuf None 1 false 0;
store_lexeme lexbuf;
store_normalized_newline nl;
quoted_string delim lexbuf
}
| eof
Expand Down
52 changes: 43 additions & 9 deletions vendor/parser-standard/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

%{

[@@@ocaml.warning "-60"] module Str = Ast_helper.Str (* For ocamldep *)
[@@@ocaml.warning "+60"]

open Asttypes
open Longident
open Parsetree
Expand Down Expand Up @@ -164,6 +167,10 @@ let mkuplus ~oploc name arg =
| _ ->
Pexp_apply(mkoperator ~loc:oploc ("~" ^ name), [Nolabel, arg])

let mk_attr ~loc name payload =
Builtin_attributes.(register_attr Parser name);
Attr.mk ~loc name payload

(* TODO define an abstraction boundary between locations-as-pairs
and locations-as-Location.t; it should be clear when we move from
one world to the other *)
Expand Down Expand Up @@ -1000,6 +1007,27 @@ reversed_nonempty_llist(X):
xs = rev(reversed_nonempty_llist(X))
{ xs }

(* [reversed_nonempty_concat(X)] recognizes a nonempty sequence of [X]s (each of
which is a list), and produces an OCaml list of their concatenation in
reverse order -- that is, the last element of the last list in the input text
appears first in the list.
*)
reversed_nonempty_concat(X):
x = X
{ List.rev x }
| xs = reversed_nonempty_concat(X) x = X
{ List.rev_append x xs }

(* [nonempty_concat(X)] recognizes a nonempty sequence of [X]s
(each of which is a list), and produces an OCaml list of their concatenation
in direct order -- that is, the first element of the first list in the input
text appears first in the list.
*)

%inline nonempty_concat(X):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is not used but will be in #2544

xs = rev(reversed_nonempty_concat(X))
{ xs }

(* [reversed_separated_nonempty_llist(separator, X)] recognizes a nonempty list
of [X]s, separated with [separator]s, and produces an OCaml list in reverse
order -- that is, the last element in the input text appears first in this
Expand Down Expand Up @@ -3291,8 +3319,8 @@ with_type_binder:
/* Polymorphic types */

%inline typevar:
QUOTE mkrhs(ident)
{ $2 }
QUOTE ident
{ mkrhs $2 $sloc }
;
%inline typevar_list:
nonempty_llist(typevar)
Expand Down Expand Up @@ -3346,7 +3374,7 @@ alias_type:
function_type
{ $1 }
| mktyp(
ty = alias_type AS QUOTE tyvar = ident
ty = alias_type AS tyvar = typevar
{ Ptyp_alias(ty, tyvar) }
)
{ $1 }
Expand Down Expand Up @@ -3927,17 +3955,17 @@ attr_id:
) { $1 }
;
attribute:
LBRACKETAT attr_id payload RBRACKET
{ Attr.mk ~loc:(make_loc $sloc) $2 $3 }
LBRACKETAT attr_id attr_payload RBRACKET
{ mk_attr ~loc:(make_loc $sloc) $2 $3 }
;
post_item_attribute:
LBRACKETATAT attr_id payload RBRACKET
{ Attr.mk ~loc:(make_loc $sloc) $2 $3 }
LBRACKETATAT attr_id attr_payload RBRACKET
{ mk_attr ~loc:(make_loc $sloc) $2 $3 }
;
floating_attribute:
LBRACKETATATAT attr_id payload RBRACKET
LBRACKETATATAT attr_id attr_payload RBRACKET
{ mark_symbol_docs $sloc;
Attr.mk ~loc:(make_loc $sloc) $2 $3 }
mk_attr ~loc:(make_loc $sloc) $2 $3 }
;
%inline post_item_attributes:
post_item_attribute*
Expand Down Expand Up @@ -3977,4 +4005,10 @@ payload:
| QUESTION pattern { PPat ($2, None) }
| QUESTION pattern WHEN seq_expr { PPat ($2, Some $4) }
;
attr_payload:
payload
{ Builtin_attributes.mark_payload_attrs_used $1;
$1
}
;
%%
2 changes: 1 addition & 1 deletion vendor/parser-standard/parsetree.mli
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ and core_type_desc =
- [T #tconstr] when [l=[T]],
- [(T1, ..., Tn) #tconstr] when [l=[T1 ; ... ; Tn]].
*)
| Ptyp_alias of core_type * string (** [T as 'a]. *)
| Ptyp_alias of core_type * string loc (** [T as 'a]. *)
| Ptyp_variant of row_field list * closed_flag * label list option
(** [Ptyp_variant([`A;`B], flag, labels)] represents:
- [[ `A|`B ]]
Expand Down
2 changes: 1 addition & 1 deletion vendor/parser-standard/printast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ let rec core_type i ppf x =
line i ppf "Ptyp_class %a\n" fmt_longident_loc li;
list i core_type ppf l
| Ptyp_alias (ct, s) ->
line i ppf "Ptyp_alias \"%s\"\n" s;
line i ppf "Ptyp_alias \"%s\"\n" s.txt;
core_type i ppf ct;
| Ptyp_poly (sl, ct) ->
line i ppf "Ptyp_poly%a\n" typevars sl;
Expand Down
Loading