grammar ClojureNamedElements;

translationunit
:
	expression* EOF
;

expression : comment_form
           | simple_top_level_element
           | namespace_declaration
           | defined_top_level_element
           | defrecord_form
           | top_level_form
           | anything;

// Entry point for test specific code health metrics:
test_specific_scope
:
	test_specific_expression* EOF
;

// Entry point for parsing function arguments; we need to handle defrecord method implementations
// differently.
fn_arg_scope : fn_arg_expression* EOF;

fn_arg_expression: implemented_method
                 | expression;

test_specific_expression : assertion_blocks
                         | .;

assertion_blocks: assertion_statement+;
assertion_statement: LeftParen IS forms RightParen;

anything : ~(LeftParen | RightParen);

namespace_declaration: LeftParen ns_keyword forms RightParen;

// Used to filter out top level named elements that aren't of interest.
simple_top_level_element: LeftParen def_keyword metadata? name simple_form RightParen;

// Plain functions or macros:
defined_top_level_element: LeftParen def_keyword metadata? name function_contexts? forms RightParen;
// Variation for defrecord:
defrecord_form: LeftParen DEFRECORD name defrecord_fields implemented_interface? implemented_methods RightParen;
defrecord_fields: '[' argument_list? ']';
implemented_interface: SYMBOL;
implemented_methods: implemented_method+;
implemented_method: LeftParen name arguments implemented_method_forms RightParen;
implemented_method_forms: form*?; // can have an empty body

function_contexts: plain_function
                 | multi_method;

plain_function: doc_string? function_metadata? arguments;
function_metadata: map;

multi_method: (simple_form | vector) optional_method_name doc_string? arguments;
optional_method_name: SYMBOL?; // (defmethod foo "a" name-of-method [params] "was a")

doc_string: LITERAL;
arguments: '[' argument_list? ']';

argument_list: argument
             | argument ','? argument_list;

argument: TYPE_HINT? AMPERSAND? (argument_form | SYMBOL);

argument_form: map | vector | simple_form;

vector: LEFT_SQUARE map_form*? RIGHT_SQUARE;
list: LeftParen map_form*? RightParen;
set: SET_CURLY simple_form*? RIGHT_CURLY;
map: LEFT_CURLY map_form* RIGHT_CURLY;
map_form: map
        | vector
        | complexity_inducing_form // order of precedence is important -- needs to come before the list
        | list
        | map_form_anything;
map_form_anything : argument_form_anything;

argument_form_anything:  ~(LeftParen | RightParen | RIGHT_CURLY | LEFT_CURLY | RIGHT_SQUARE | LEFT_SQUARE);

forms: form+;

form: comment_form
    | complexity_inducing_form
    | lambda_fn
    | complex_form
    | empty_list
    | anything;

empty_list: LeftParen RightParen;
simple_form: vector | map | set | argument_form_anything;

complexity_inducing_form: LeftParen complexity_element form+ RightParen;
complexity_element: IF | IF_LET | IF_NOT
                  | WHEN | WHEN_LET | WHEN_NOT
                  | UNLESS | COND | CASE
                  | DO_SEQ | DO_TIMES
                  | LOOP | RECUR
                  | FOR
                  | CATCH | THROW | FINALLY
                  | AND | OR;

lambda_fn: LeftParen FN arguments form+ RightParen;

comment_form: LeftParen COMMENT form*? RightParen
            | READER_MACRO_COMMENT form;

complex_form: simple_form
            | LeftParen form+ RightParen;

top_level_form: complex_form;

name: SYMBOL;

ns_keyword: 'ns';
def_keyword: DEF;
metadata: METADATA;

// NOTE: \\ must be here too, not just \", otherwise  ANTLR would try to consume too much,
// extending the string up to the next " it encounters anywhere in the file!
// This is most problematic in languages like Clojure which allows strings to span mutliple lines.
// see https://app.clickup.com/t/9015696197/CS-2350
fragment ESCAPED :  '\\\\' | '\\"';
LITERAL : '"' ( ESCAPED | ~('"') )*? '"';

LITERAL_CHAR : '\'' . '\'' -> skip;

LeftParen : '(';
RightParen: ')';

LEFT_SQUARE: '[';
RIGHT_SQUARE: ']';

LEFT_CURLY: '{';
RIGHT_CURLY: '}';

SET_CURLY: '#{';
LineComment: ';' ~[\r\n]* -> skip;

METADATA: '^:'[*+!\-?><a-zA-Z0-9_]+;
TYPE_HINT: '^'[\-<a-zA-Z0-9_.]+;

FN: 'fn';

IS: 'is';

IF: 'if';
IF_LET: 'if-let';
IF_NOT: 'if-not';
WHEN: 'when';
WHEN_LET: 'when-let';
WHEN_NOT: 'when-not';
UNLESS: 'unless';
COND: 'cond';
CASE: 'case';
DO: 'do';
DO_SEQ: 'doseq';
DO_TIMES: 'dotimes';
LOOP: 'loop';
RECUR: 'recur';
FOR: 'for';
CATCH: 'catch';
THROW: 'throw';
FINALLY: 'finally';

// Logical operators
AND: 'and';
OR: 'or';

COMMENT: 'comment';
READER_MACRO_COMMENT: '#_';

DEFPROJECT: 'defproject'; // ignore these to avoid false positives
DEFRECORD: 'defrecord'; // parse separately to avoid treating the whole form as one function

DEF: 'def'[*+!\-_?>a-zA-Z]*;

KEYWORD: ':'+ [*+!\-_?><a-zA-Z0-9]+;

SYMBOL : [*+!\-_?>a-zA-Z][*+!\-_?><a-zA-Z0-9]*;

SCOPER : '/';
SCOPED_SYMBOL : SYMBOL (SCOPER SYMBOL)+;

AMPERSAND: '&';

Whitespace : [ \t]+ -> skip;
NEWLINE : '\r'? '\n' -> skip;

ANY_CHAR : .; // Put this lexer rule last to give it the lowest precedence
