forked from vectordotdev/vector
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck-events
executable file
·510 lines (442 loc) · 18.3 KB
/
check-events
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
#!/usr/bin/env ruby
# coding: utf-8
require 'find'
# These members/tags are common to multiple events
BYTE_SIZE_COUNT = ['byte_size', 'count']
# SUFFIX => [MESSAGE, COUNTERS, ADDITIONAL_TAGS]
EVENT_CLASSES = {
'BytesReceived' => [
'Bytes received.', ['received_bytes'], ['byte_size', 'protocol']
],
'EventsReceived' => [
'Events received.', ['received_events', 'received_event_bytes'], ['count', 'byte_size']
],
'EventsSent' => [
'Events sent.', ['sent_events', 'sent_event_bytes'], ['count', 'byte_size']
],
'BytesSent' => [
'Bytes sent.', ['sent_bytes'], ['byte_size', 'protocol']
],
}
METRIC_NAME_EVENTS_DROPPED = 'component_discarded_events_total'
METRIC_NAME_ERROR = 'component_errors_total'
SKIP_CHECK = [ 'DefaultName' ]
def hash_array_add(hash, key, item)
arr = hash.fetch(key, Array::new)
arr.append(item)
hash[key] = arr
end
def is_constant?(name)
name.start_with? '"' and name.end_with? '"' or name.match? /^(.+::)[A-Z0-9_]$/
end
def find_line_number(haystack, needle)
idx = haystack.index(needle)
if !idx.nil?
prior = haystack[0,idx]
prior.lines.count
else
nil
end
end
# A class to hold error reports and common functionality
class Event
attr_accessor :path, :skip_dropped_events, :uses, :skip_duplicate_check, :skip_validity_check, :impl_internal_event, :impl_register_event, :impl_event_handle
attr_reader :name, :reports, :logs
attr_writer :members
def initialize(name)
@path = nil
@skip_duplicate_check = false
@skip_validity_check = false
@skip_dropped_events = false
@emits_component_events_dropped = false
@name = name
@reports = []
@members = {}
@counters = {}
@metrics = {}
@logs = []
@uses = 0
@impl_internal_event = false
@impl_register_event = false
@impl_event_handle = false
end
def add_metric(type, name, tags)
@metrics["#{type}:#{name}"] = tags
if type == 'counter'
@counters[name] = tags
end
end
# Scan for counter names and tags
def scan_metrics(block)
block.scan(/ (counter|gauge|histogram)!\((?:\n\s+)?"([^"]+)",(.+?)\)[;\n]/ms) \
do |type, name, tags|
tags = Hash[tags.scan(/"([^"]+)" => (.+?)(?:,|$)/)]
add_metric(type, name, tags)
end
end
# Scan the registered event macro block
def scan_registered_event(event_fields, handle_fields, data_type, emit_block)
@members = event_fields.scan(/^ *([a-z0-9_]+): *(.+?),$/m) \
.map { |member, type| [member, type] }
handle_fields.scan(/^ *([a-z0-9_]+): *(.+?) *= *(.+?),$/m) do |name, type, assignment|
self.scan_component_dropped_events(assignment)
# This is a _slightly_ different regex than the above, couldn't figure a way to unify them
assignment.match(/register_(counter|gauge|histogram)!\((?:\n\s+)?"([^"]+)"(,.+)?\)/ms) \
do
|type, name, tags|
tags = tags || ''
tags = Hash[tags.scan(/"([^"]+)" => (.+?)(?:,|$)/)]
add_metric(type, name, tags)
true
end
end
self.scan_logs(emit_block)
end
def add_log(type, message, parameters)
@logs.append([type, message, parameters])
end
# Scan for log outputs and their parameters
def scan_logs(block)
block.scan(/
(trace|debug|info|warn|error)! # The log type
\(\s*(?:message\s*=\s*)? # Skip any leading "message =" bit
(?:"([^({)][^("]+)"|([^,]+)) # The log message text
([^;]*?) # Match the parameter list
\)(?:;|\n\s*}) # Normally would end with simply ");", but some are missing the semicolon
/mx) \
do |type, raw_message, var_message, parameters|
parameters = parameters.scan(/([a-z0-9_]+) *= .|[?%]([a-z0-9_.]+)/) \
.map { |assignment, simple| assignment or simple }
message = raw_message.nil? ? var_message : raw_message
add_log(type, message, parameters)
end
end
# Scan for the emission of ComponentEventsDropped.
def scan_component_dropped_events(block)
if block.match?(/(emit|register)!\(\s*ComponentEventsDropped\b/)
@emits_component_events_dropped = true
end
end
# The event signature is used to check for duplicates and is
# composed from the member names and their types, the metric types,
# names, and their tags, and the log messages and parameters. If no
# metrics and no logs are defined for the event, the signature is
# `nil` to skip duplicate checking.
def signature
if @metrics.length == 0 and @logs.length == 0
nil
else
members = @members.map { |name, type| "#{name}:#{type}" }.sort.join(':')
metrics = @metrics.map do |name, value|
tags = value.keys.sort.join(',')
"#{name}(#{tags})"
end
metrics = metrics.sort.join(';')
logs = @logs.sort.join(';')
"#{members}[#{logs}][#{metrics}]"
end
end
def valid?
valid_with_handle? self
end
def valid_with_handle?(handle)
if @uses == 0
append('Event has no uses.')
end
EVENT_CLASSES.each do |suffix, (required_message, counters, additional_tags)|
if @name.end_with? suffix
handle.logs.each do |type, message, parameters|
if type != 'trace'
append('Log type MUST be \"trace!\".')
end
if message != required_message
append("Log message MUST be \"#{required_message}\" (is \"#{message}\").")
end
additional_tags.each do |tag_name|
unless parameters.include? tag_name
append("Log MUST contain tag \"#{tag_name}\"")
end
end
end
counters.each do |counter|
counter = "component_#{counter}_total"
counters_must_include_exclude_tags(counter, additional_tags - BYTE_SIZE_COUNT)
end
end
end
has_error_logs = handle.logs.one? { |type, _, _| type == 'error' }
is_events_dropped_event = (@name.end_with? 'EventsDropped' or @counters.include? METRIC_NAME_EVENTS_DROPPED)
# Validate <Name>Error events
if (has_error_logs and !is_events_dropped_event) or @name.end_with? 'Error'
# Name check
append('Error events MUST be named "___Error".') unless @name.end_with? 'Error'
# Outputs an error log
handle.log_level_exactly('error')
# Metric check
counters_must_include_exclude_tags(METRIC_NAME_ERROR, ['error_type', 'stage'])
# Make sure Error events contain the required parameters
handle.logs.each do |type, message, parameters|
if type == 'error'
['error_type', 'stage'].each do |parameter|
unless parameters.include? parameter
append("Error log for Error event MUST include parameter \"#{parameter}\".")
end
end
['error_code', 'error_type', 'stage'].each do |parameter|
if parameters.include? parameter and !@counters[METRIC_NAME_ERROR].include? parameter
append("Counter \"#{METRIC_NAME_ERROR}\" must include \"#{parameter}\" to match error log.")
end
end
end
end
end
# TODO remove @skip_dropped_events check logic after DroppedEvents audit is complete
# (https://github.com/vectordotdev/vector/issues/13995)
# Validate <Namespace>EventsDropped events
if is_events_dropped_event && !@skip_dropped_events
# Don't run the checks on event structs which themselves emit ComponentEventsDropped,
# as the ComponentEventsDropped event is already checked.
# Instead, verify that component_discarded_events_total is not being over-incremented.
if @emits_component_events_dropped
if @counters.include? METRIC_NAME_EVENTS_DROPPED
append("Event emitting ComponentEventsDropped should not also increment counter `#{METRIC_NAME_EVENTS_DROPPED}`")
end
else
# Name check
append('EventsDropped events MUST be named "___EventsDropped".') unless @name.end_with? 'EventsDropped'
# Outputs an error log or debug log. Which level is dependent on the value of the param `intentional`, however
# because implementation can involve passing in the value of the `intentional` bool at compile time, we would need to
# scan all the source code for places that emit this event to determine that.
handle.log_level_one_of(['error', 'debug'])
# Metric check
counters_must_include_exclude_tags(METRIC_NAME_EVENTS_DROPPED, ['intentional'], ['reason', 'count'])
# Make sure EventsDropped events contain the required parameters
handle.logs.each do |type, message, parameters|
if type == 'error'
['count', 'intentional', 'reason'].each do |parameter|
unless parameters.include? parameter
append("Error log for EventsDropped event MUST include parameter \"#{parameter}\".")
end
end
['intentional'].each do |parameter|
if parameters.include? parameter and !@counters[METRIC_NAME_EVENTS_DROPPED].include? parameter
append("Counter \"#{METRIC_NAME_EVENTS_DROPPED}\" must include \"#{parameter}\" to match error log.")
end
end
end
end
end
end
@counters.each do |name, tags|
# Only component_errors_total and component_discarded_events_total metrics are considered
if ['component_errors_total', 'component_discarded_events_total'].include? name
# Make sure defined tags to counters are constants
tags.each do |tag, value|
if tag == 'stage'
if !value.start_with? 'error_stage::'
append("Counter \"#{name}\" tag \"#{tag}\" value must be an \"error_stage\" constant.")
end
elsif tag == 'error_type'
if !value.start_with? 'error_type::'
append("Counter \"#{name}\" tag \"#{tag}\" value must be an \"error_type\" constant.")
end
end
end
end
end
@reports.empty?
end
def log_level_one_of(levels)
if @logs.find_index { |type, message, parameters| levels.include? type }.nil?
append("This event MUST log with one of these levels: #{levels}.")
end
end
def log_level_exactly(level)
log_level_one_of([level])
end
def append(report)
@reports.append(report)
end
private
def counters_must_include_exclude_tags(name, required_tags, exclude_tags = [])
unless @counters.include? name
append("This event MUST increment counter \"#{name}\".")
else
tags = @counters[name]
required_tags.each do |tag|
unless tags.include? tag
append("Counter \"#{name}\" MUST include tag \"#{tag}\".")
end
end
exclude_tags.each do |tag|
if tags.include? tag
append("Counter \"#{name}\" MUST NOT include tag \"#{tag}\".")
end
end
end
end
end
$all_events = Hash::new { |hash, key| hash[key] = Event::new(key) }
error_count = 0
# Scan sources and build internal structures
Find.find('./src', './lib') do |path|
if path.start_with? './'
path = path[2..]
end
if path.end_with? '.rs'
text = File.read(path)
text.scan(/\b(?:emit!?|register!?)\((?:[a-z][a-z0-9_:]+)?([A-Z][A-Za-z0-9]+)/) \
do |event_name,|
$all_events[event_name].uses += 1
end
# Check log message texts for correct formatting.
if path.start_with? 'src/'
reports = []
# Try to find all general usage of the various `tracing` macros.
text.scan(/(
(trace|debug|info|warn|error)!\( # Log type.
([^;]*?) # All parameters to the macro.
\)(?:;|\n\s*}) # Handles usages that lack a trailing semicolon.
)/mx) \
do |full, type, params|
# Extract each parameter to the macros, which involves handling structured fields and
# string literals. We parse them further below so that we can iterate through them to try
# and determine what the actual log message is, depending on if it's set by using the
# `message` field, or implicitly with a string literal.
#
# We also have some special handling in there for `tracing`-specific "target" and "parent"
# settings which influence how the event is handled when being processed by a subscriber,
# which we don't care about _here_ but need to account for in our pattern to parse things.
params = params.scan(/("(?:[^"\\]++|\\.)*+"|(?:target|parent):\s*[^,]+|(\w+\s*=\s*(?:"(?:[^"\\]++|\\.)*+"|[%?]?[^,]+))|[%?][^,]+)/) \
.map do |param|
if /^\".*\"$/.match?(param[0].strip)
{ "type" => "litstr", "value" => param[0] }
elsif param[0].include? "="
parts = param[0].split('=', 2).map { |part| part.strip }
{ "type" => "named_field", "field" => parts[0], "value" => parts[1] }
else
{ "type" => "field", "field" => param[0] }
end
end
# See if we found a message field.
message_param = params.find { |param|
# Use the first string literal parameter.
param["type"] == "litstr" ||
# Or the first named field called `message` that has a value that is a string literal.
(param["type"] == "named_field" && param["field"] == "message" && /^\".*\"$/.match?(param["value"]))
}
# We further scrutinize the message field, if we believe we found one. This lets us avoid
# scenarios where variable interpolation is being used, since we can't reasonably detect if
# an interpolated variable at the beginning or end of the message is capitalized or has a
# trailing period, respectively.
has_message = !message_param.nil?
message = if has_message then message_param["value"].gsub(/^"|"$/, '') else nil end
is_capitalized = !has_message || (message[0] == "{" || !message.match?(/^[a-zA-Z]/) || message.match?(/^[[:upper:]]/))
has_trailing_period = !has_message || (message[-1, 1] == "}" || message.match?(/\.$/))
match_reports = []
match_reports.append('Message must start with a capital.') unless is_capitalized
match_reports.append('Message must end with a period.') unless has_trailing_period
unless match_reports.empty?
line_no = find_line_number(text, full)
match_reports.each { |report| reports.push(" #{report} (`#{type}` call on #{path}:#{line_no})") }
end
end
unless reports.empty?
reports.each { |report| puts report }
error_count += reports.length
end
end
# TODO remove @skip_dropped_events check logic after DroppedEvents audit is complete
# (https://github.com/vectordotdev/vector/issues/13995)
skip_dropped_events = text.match? /## skip check-dropped-events ##/i
if (path.start_with? 'src/internal_events/' or path.start_with? 'lib/vector-common/src/internal_event/')
# Scan internal event structs for member names
text.scan(/[\n ]struct (\S+?)(?:<.+?>)?(?: {\n(.+?)\n\s*}|;)\n/m) do |struct_name, members|
event = $all_events[struct_name]
event.path = path
event.skip_dropped_events = skip_dropped_events
if members
members = members.scan(/ ([A-Za-z0-9_]+): +(.+?),/).map { |member, type| [member, type] }
event.members = members.to_h
end
end
# Scan internal event implementation blocks for logs and metrics
text.scan(/^(\s*)impl(?:<.+?>)? (InternalEvent|RegisterInternalEvent|InternalEventHandle) for ([A-Za-z0-9_]+)(?:<.+?>)? {\n(.+?)\n\1}$/m) \
do |_space, trait, event_name, block|
event = $all_events[event_name]
event.path = path
event.skip_duplicate_check = block.match? /## skip check-duplicate-events ##/i
event.skip_validity_check = block.match? /## skip check-validity-events ##/i
if trait == 'InternalEvent'
# Look-aside internal events that defer their implementation to a registered event.
if ! block.include? 'register('
event.impl_internal_event = true
event.scan_metrics(block)
event.scan_logs(block)
event.scan_component_dropped_events(block)
end
elsif trait == 'RegisterInternalEvent'
# This is just a dummy name and will cause spurious errors, but it will at least surface
# the issue of using the macro.
event.impl_register_event = event_name
event.append("Do not implement RegisterInternalEvent manually. Use the registered_event! macro instead.")
elsif trait == 'InternalEventHandle'
event.impl_event_handle = true
event.scan_logs(block)
end
end
end
# Scan for the `registered_event` macro
text.scan(/^(crate::|vector_common::|)registered_event! *[({]\n *([A-Za-z0-9_]+) *({(.*?)})? *=> *{(.+?)}$.*^ *fn emit\(\&self, [a-z0-9_]+: ([A-Za-z0-9_]+)\) {$(.+?)}\n(\);|\})$/m) \
do |_, event_name, _, event_fields, handle_fields, data_type, emit_block, _|
event = $all_events[event_name]
event.path = path
event.scan_registered_event(event_fields || "", handle_fields, data_type, emit_block)
end
end
end
$duplicates = Hash::new { |hash, key| hash[key] = [] }
$all_events.each do |name, event|
if SKIP_CHECK.include? name
next
end
# Check for duplicated signatures
if !event.skip_duplicate_check and (event.impl_internal_event or event.impl_event_handle)
signature = event.signature
if signature
$duplicates[event.signature].append(name)
end
end
# Check events for validity
if !event.skip_validity_check
if event.impl_internal_event
unless event.valid?
puts "#{event.path}: Errors in event #{event.name}:"
event.reports.each { |report| puts " #{report}" }
error_count += 1
end
elsif event.impl_register_event
handle = $all_events[event.impl_register_event]
if handle
unless event.valid_with_handle? handle
puts "#{event.path}: Errors in event #{event.name}:"
event.reports.each { |report| puts " #{report}" }
error_count += 1
end
else
puts "Registered event #{event.name} references nonexistent handle #{event.impl_register_event}"
error_count += 1
next
end
end
end
end
$duplicates.each do |signature, dupes|
if dupes.length > 1
dupes = dupes.join(', ')
puts "Duplicate events detected: #{dupes}"
error_count += 1
end
end
puts "#{error_count} error(s)"
exit 1 if error_count > 0