forked from jamiejennings/rosie-pattern-language
-
Notifications
You must be signed in to change notification settings - Fork 0
/
language-comments.rpl
141 lines (108 loc) · 5.46 KB
/
language-comments.rpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
---- -*- Mode: rpl; -*-
----
---- language-comments.rpl Extract comments (or just code) from source files
----
---- © Copyright IBM Corporation 2016.
---- LICENSE: MIT License (https://opensource.org/licenses/mit-license.html)
---- AUTHOR: Jamie A. Jennings
-- USAGE:
-- You must use rosie's -wholefile command line option with these patterns.
-- EXAMPLE: extract comments
--
-- bash-3.2$ rosie -wholefile c.any_comment tmp/lpeg-1.0.0/lpvm.c
--
-- ** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $
-- ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
-- initial size for call/backtrack stack
-- ** {======================================================
-- ** Virtual Machine
-- ** =======================================================
-- saved position (or NULL for calls) next instruction
-- ** Double the size of the array of captures
-- [...]
-- EXAMPLE: extract code
--
-- bash-3.2$ rosie -wholefile c.code_only tmp/lpeg-1.0.0/lpvm.c
--
-- #include <limits.h>
-- #include <string.h>
--
-- #include "lua.h"
-- #include "lauxlib.h"
-- [...]
-- require "csv"
alias dquoted_field = csv.dquoted_field_contents
alias squoted_field = csv.squoted_field_contents
alias string_constant = csv.dquoted_field / csv.squoted_field
comment = {!"\n" .}* -- terminates at newline or end of input
alias hash_comment = {[:space:]* "#" comment "\n"?} -- note: eats newline if not at end of input
alias hash_skip = { string_constant / {!"#" .} }*
----------------------------------------------------------------------------------------
-- Ruby
----------------------------------------------------------------------------------------
-- Useful patterns to search for: ruby.line_comments_only, ruby.any_comment
-- FIXME:
-- N.B. These patterns do not detect ruby's "here documents", which are a long form of string
-- literal. Thus, if a ruby long string contains something that looks like a comment, it will turn
-- up in the comment pattern.
alias ruby.line_comments_only = hash_comment*
alias ruby.any_comment = { hash_skip hash_comment }*
----------------------------------------------------------------------------------------
-- Python
----------------------------------------------------------------------------------------
-- Useful patterns to search for: py.line_comments_only, py.any_comment, py.long_strings_only, py.code_only
-- Important note:
--
-- Long (multi-line) strings in Python are delimited by triple quotes (single or double). When a
-- long string appears at the start of a file or immediately after a definition, it is considered a
-- "docstring", which is a type of comment. However, long strings can occur in regular Python code
-- as multi-line string literals. So, whether a long string should be considered code or comment
-- depends on its context. We could write some patterns to distinguish between these two cases, but
-- for now we will consider all long strings to be comments.
alias py.long_string_delimeter = { "\"\"\"" / "'''" }
py.long_string_contents = { !py.long_string_delimeter . }*
alias py.long_string = { py.long_string_delimeter py.long_string_contents py.long_string_delimeter }
alias py.any_skip = {!py.long_string_delimeter !hash_comment .}*
alias py.any_comment = { py.any_skip {py.long_string / hash_comment} }*
alias py.comments_to_skip =
{ {py.long_string_delimeter {!py.long_string_delimeter .}* py.long_string_delimeter}
/
{"#" {!"\n" .}* }
}
py.line_comments_only = hash_comment
py.code = {!py.long_string_delimeter !"#" .}+ -- keep chars up to a comment
-- Top level patterns to use:
alias py.code_only = { py.comments_to_skip / py.code}* -- discard comments, keep code
py.long_strings_only = {py.long_string / .}*
py.line_comments_only = ( [^#]* py.line_comments_only )*
----------------------------------------------------------------------------------------
-- Perl
----------------------------------------------------------------------------------------
-- Useful patterns to search for: pl.line_comments_only, pl.any_comment
alias pl.line_comments_only = hash_comment*
alias pl.any_comment = { hash_skip hash_comment }*
----------------------------------------------------------------------------------------
-- javascript, Java, C, and more
----------------------------------------------------------------------------------------
-- Useful patterns to search for: js.line_comments_only, js.block_comment, js.any_comment,
-- js.code_only, c.any_comment, c.code_only
alias js.comment_char = "//"
alias js.start_comment = "/*"
alias js.end_comment = "*/"
alias js.any_comment_start = js.comment_char / js.start_comment
alias js.block_comment_skip = {!js.start_comment .}*
js.comment_block = {!js.end_comment .}*
alias js.block_comment_full = { js.start_comment
js.comment_block
js.end_comment
}
alias js.skip = { string_constant / {!js.any_comment_start .} }*
alias js.block_comment = { js.block_comment_skip js.block_comment_full }+
alias js.line_comments_only = {[:space:]* js.comment_char comment "\n"?}+
alias js.any_comment = { js.skip {js.line_comments_only / js.block_comment_full} }+
alias js.comments_to_skip =
{js.start_comment {!js.end_comment .}* js.end_comment} / {js.comment_char {!"\n" .}*}
js.code = {!js.start_comment !js.comment_char .}+ -- keep chars up to a comment
alias js.code_only = { js.comments_to_skip / js.code}* -- discard comments, keep code
c.any_comment = js.any_comment
c.code_only = js.code_only