forked from rouge-ruby/rouge
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request rouge-ruby#655 from abalkin/issue-547
Issue 547: Lexer for kdb+
- Loading branch information
Showing
4 changed files
with
216 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
/ comment | ||
x: til 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
module Rouge | ||
module Lexers | ||
class Q < RegexLexer | ||
title 'Q' | ||
desc 'The Q programming language (kx.com)' | ||
tag 'q' | ||
aliases 'kdb+' | ||
filenames '*.q' | ||
mimetypes 'text/x-q', 'application/x-q' | ||
|
||
identifier = /\.?[a-z][a-z0-9_.]*/i | ||
|
||
def self.keywords | ||
@keywords ||= %w[do if while select update delete exec from by] | ||
end | ||
|
||
def self.word_operators | ||
@word_operators ||= %w[ | ||
and or except inter like each cross vs sv within where in asof bin binr cor cov cut ej fby | ||
div ij insert lj ljf mavg mcount mdev mmax mmin mmu mod msum over prior peach pj scan scov setenv ss | ||
sublist uj union upsert wavg wsum xasc xbar xcol xcols xdesc xexp xgroup xkey xlog xprev xrank | ||
] | ||
end | ||
|
||
def self.builtins | ||
@builtins ||= %w[ | ||
first enlist value type get set count string key max min sum prd last flip distinct raze neg | ||
desc differ dsave dev eval exit exp fills fkeys floor getenv group gtime hclose hcount hdel hopen hsym | ||
iasc idesc inv keys load log lsq ltime ltrim maxs md5 med meta mins next parse plist prds prev rand rank ratios | ||
read0 read1 reciprocal reverse rload rotate rsave rtrim save sdev show signum sin sqrt ssr sums svar system | ||
tables tan til trim txf ungroup var view views wj wj1 ww | ||
] | ||
end | ||
|
||
def self.analyze_text(text) | ||
return 0 | ||
end | ||
|
||
state :root do | ||
# q allows a file to start with a shebang | ||
rule /#!(.*?)$/, Comment::Preproc, :top | ||
rule //, Text, :top | ||
end | ||
|
||
state :top do | ||
# indented lines at the top of the file are ignored by q | ||
rule /^[ \t\r]+.*$/, Comment::Special | ||
rule /\n+/, Text | ||
rule //, Text, :base | ||
end | ||
|
||
state :base do | ||
rule /\n+/m, Text | ||
rule(/^.\)/, Keyword::Declaration) | ||
|
||
# Identifiers, word operators, etc. | ||
rule /#{identifier}/ do |m| | ||
if self.class.keywords.include? m[0] | ||
token Keyword | ||
elsif self.class.word_operators.include? m[0] | ||
token Operator::Word | ||
elsif self.class.builtins.include? m[0] | ||
token Name::Builtin | ||
elsif /^\.[zQqho]\./ =~ m[0] | ||
token Name::Constant | ||
else | ||
token Name | ||
end | ||
end | ||
|
||
# White space and comments | ||
rule(%r{[ \t\r]\/.*$}, Comment::Single) | ||
rule(/[ \t\r]+/, Text::Whitespace) | ||
rule(%r{^/$.*?^\\$}m, Comment::Multiline) | ||
rule(%r{^\/[^\n]*$(\n[^\S\n]+.*$)*}, Comment::Multiline) | ||
# til EOF comment | ||
rule(/^\\$/, Comment, :bottom) | ||
rule(/^\\\\\s+/, Keyword, :bottom) | ||
|
||
# Literals | ||
## strings | ||
rule(/"/, Str, :string) | ||
## timespan/stamp constants | ||
rule(/(?:\d+D|\d{4}\.[01]\d\.[0123]\d[DT])(?:[012]\d:[0-5]\d(?::[0-5]\d(?:\.\d+)?)?|([012]\d)?)[zpn]?\b/, | ||
Literal::Date) | ||
## time/minute/second constants | ||
rule(/[012]\d:[0-5]\d(?::[0-5]\d(\.\d+)?)?[uvtpn]?\b/, Literal::Date) | ||
## date constants | ||
rule(/\d{4}\.[01]\d\.[0-3]\d[dpnzm]?\b/, Literal::Date) | ||
## special values | ||
rule(/0[nNwW][hijefcpmdznuvt]?/, Keyword::Constant) | ||
|
||
# operators to match before numbers | ||
rule(%r{'|\/:|\\:|':|\\|\/|0:|1:|2:}, Operator) | ||
|
||
## numbers | ||
rule(/(?:\d+(?:\.\d*)?|\.\d+)(?:e[+\-]?\d+|\d+\.\d*|\.\d+)?[ef]?/, Num::Float) | ||
rule(/[01]+b?/, Num) | ||
rule(/[0-9]+[hij]?/, Num::Integer) | ||
## symbols and paths | ||
rule(%r{(`:[:a-z0-9._\/]*|`(?:[a-z0-9.][:a-z0-9._]*)?)}i, Str::Symbol) | ||
rule(/(?:<=|>=|<>|::)|[?:$%&|@._#*^\-+~,!><=]:?/, Operator) | ||
|
||
rule /[{}\[\]();]/, Punctuation | ||
|
||
# commands | ||
rule(/\\.*\n/, Text) | ||
|
||
end | ||
|
||
state :string do | ||
rule(/"/, Str, :pop!) | ||
rule /\\([\\nr]|[01][0-7]{2})/, Str::Escape | ||
rule /[^\\"\n]+/, Str | ||
rule /\\/, Str # stray backslash | ||
end | ||
|
||
state :bottom do | ||
rule /.*\z/m, Comment::Multiline | ||
end | ||
end | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
describe Rouge::Lexers::Q do | ||
let(:subject) { Rouge::Lexers::Q.new } | ||
|
||
describe 'guessing' do | ||
include Support::Guessing | ||
|
||
it 'guesses by filename' do | ||
assert_guess :filename => 'foo.q' | ||
end | ||
|
||
it 'guesses by mimetype' do | ||
assert_guess :mimetype => 'text/x-q' | ||
assert_guess :mimetype => 'application/x-q' | ||
end | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#! /usr/bin/env q | ||
|
||
Demo for the Q programming language (kx.com) | ||
|
||
x:til 10 | ||
|
||
/ | ||
|
||
Multi-line | ||
comment | ||
|
||
\ | ||
|
||
/ Line comments that start at the left margin | ||
can be continued on indented lines. | ||
f:{ / but this comment will not continue | ||
x + y} | ||
|
||
/ Over and scan | ||
f/1 2 3 | ||
f\1 2 3 | ||
|
||
do|if|while|select|update|delete|exec|from|by | ||
|
||
div|ij|insert|lj|ljf|mavg|mcount|mdev|mmax|mmin|mmu|mod|msum|over|prior|peach|pj|scan | ||
|
||
iasc|idesc|inv|keys|load|log|lsq|ltime|ltrim|maxs|md5|meta|mins|next|parse|plist|prds|prev|rand|rank|ratios | ||
|
||
0nh 0Nc 0wt 0Wu | ||
|
||
/ Operators | ||
(!;@;#;$;%;^;&;*) | ||
(!:;@:;#:;$:;%:;^:;&:;*:) | ||
(0:;1:;2:) | ||
(';/;\;':;;/:;\:) | ||
|
||
/ System namespaces | ||
(.z.i;.Q.K) | ||
|
||
/ Strings | ||
("";"x\n";"\007") | ||
|
||
|
||
10D 10D10 10D10:10 10D10:10:10 10D10:10:10.11 | ||
10Dz 10D10p 10D10:10n 10D10:10:10z 10D10:10:10.11p | ||
|
||
2001.10.10D 2001.10.10D10 2001.10.10D10:10 2001.10.10D10:10:10 2001.10.10D10:10:10.11 | ||
2001.10.10Dz 10D10p 2001.10.10D10:10n 2001.10.10D10:10:10z 2001.10.10D10:10:10.11p | ||
|
||
10:10 10:10:10 10:10:10.1111 | ||
10:10u 10:10:10u 10:10:10.1111u | ||
10:10v 10:10:10v 10:10:10.1111v | ||
10:10p 10:10:10p 10:10:10.1111p | ||
10:10t 10:10:10t 10:10:10.1111t | ||
10:10n 10:10:10n 10:10:10.1111n | ||
|
||
2001.10.10 2001.10.10d 2001.10.10p 2001.10.10n 2001.10.10z 2001.10.10m | ||
|
||
10 10.1 10e10 10e-10 .1 .1e+10 | ||
10e 10.1e 10e10e 10e-10e .1e .1e+10e | ||
10f 10.1f 10e10f 10e-10f .1f .1e+10f | ||
|
||
`sym`a_b | ||
`:path`:/a/b/c | ||
|
||
k)x:!5 | ||
|
||
\ | ||
til eof comment | ||
/ | ||
no effect | ||
\ | ||
no effect |