From 1cd7b7b90e408d1a82df218f807ee9c25ae08411 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Tue, 8 Mar 2016 13:57:19 +0100 Subject: [PATCH 1/4] Add initial version of GFM Abstract Syntax Tree [ci skip] Conflicts: lib/gitlab/gfm/reference_unfolder.rb --- lib/gitlab/gfm/ast.rb | 16 ++++ lib/gitlab/gfm/ast/lexer.rb | 82 +++++++++++++++++ lib/gitlab/gfm/ast/parser.rb | 21 +++++ lib/gitlab/gfm/ast/syntax/content.rb | 20 +++++ .../gfm/ast/syntax/markdown/code_block.rb | 27 ++++++ lib/gitlab/gfm/ast/syntax/node.rb | 88 +++++++++++++++++++ lib/gitlab/gfm/ast/syntax/text.rb | 20 +++++ spec/lib/gitlab/gfm/ast/lexer_spec.rb | 5 ++ spec/lib/gitlab/gfm/ast/parser_spec.rb | 30 +++++++ spec/lib/gitlab/gfm/ast_spec.rb | 10 +++ 10 files changed, 319 insertions(+) create mode 100644 lib/gitlab/gfm/ast.rb create mode 100644 lib/gitlab/gfm/ast/lexer.rb create mode 100644 lib/gitlab/gfm/ast/parser.rb create mode 100644 lib/gitlab/gfm/ast/syntax/content.rb create mode 100644 lib/gitlab/gfm/ast/syntax/markdown/code_block.rb create mode 100644 lib/gitlab/gfm/ast/syntax/node.rb create mode 100644 lib/gitlab/gfm/ast/syntax/text.rb create mode 100644 spec/lib/gitlab/gfm/ast/lexer_spec.rb create mode 100644 spec/lib/gitlab/gfm/ast/parser_spec.rb create mode 100644 spec/lib/gitlab/gfm/ast_spec.rb diff --git a/lib/gitlab/gfm/ast.rb b/lib/gitlab/gfm/ast.rb new file mode 100644 index 000000000000..ee0af8ce7ab8 --- /dev/null +++ b/lib/gitlab/gfm/ast.rb @@ -0,0 +1,16 @@ +module Gitlab + module Gfm + ## + # GitLab Flavoured Markdown + # - Abstract Syntax Tree + # - Facade + # + module Ast + extend self + + def parse(text) + Parser.new(text).tree + end + end + end +end diff --git a/lib/gitlab/gfm/ast/lexer.rb b/lib/gitlab/gfm/ast/lexer.rb new file mode 100644 index 000000000000..7897701303d4 --- /dev/null +++ b/lib/gitlab/gfm/ast/lexer.rb @@ -0,0 +1,82 @@ +module Gitlab + module Gfm + module Ast + class Lexer + ## + # GFM AST Lexer + # + def initialize(text, tokens, parent = nil) + @text = text + @tokens = tokens + @parent = parent + @nodes = [] + end + + ## + # Returns all nodes that has been found in text. + # + # We expect that all text is covered by lexemes. + # + def process! + @tokens.each do |token| + ranges_available.each do |range| + process_range(token, range) + end + end + + validate! + @nodes.each(&:process!) + @nodes.sort! + end + + private + + ## + # Processes a given range. + # + # If pattern is found in a range, but this range is already covered + # by an existing node, we ommit this one (flat search). + # + def process_range(token, range) + (@text[range]).scan(token.pattern).each do + match = Regexp.last_match + next if ranges_taken.any? { |taken| taken.include?(match.begin(0)) } + + @nodes << lexeme(token, match) + end + end + + def lexeme(token, match) + text, range = match[0], (match.begin(0)..(match.end(0))) + token.new(text, range, match, @parent) + end + + def validate! + end + + def ranges_taken + @nodes.map(&:range) + end + + ## + # TODO, ugly method we have to use until we have Range#- operator + # + def ranges_available + indexes_taken = @nodes.each_with_object([]) do |node, taken| + taken.concat(node.range.to_a) + end + + text_indexes = (0..@text.length).to_a + indexes_available = (text_indexes - indexes_taken).sort.uniq + indexes_available.inject([]) do |ranges, n| + if ranges.empty? || ranges.last.last != n - 1 + ranges + [n..n] + else + ranges[0..-2] + [ranges.last.first..n] + end + end + end + end + end + end +end diff --git a/lib/gitlab/gfm/ast/parser.rb b/lib/gitlab/gfm/ast/parser.rb new file mode 100644 index 000000000000..0087f21e85a5 --- /dev/null +++ b/lib/gitlab/gfm/ast/parser.rb @@ -0,0 +1,21 @@ +module Gitlab + module Gfm + module Ast + class Parser + def initialize(text) + @text = text + end + + def tree + content_nodes.first + end + + private + + def content_nodes + Lexer.new(@text, [Syntax::Content]).process! + end + end + end + end +end diff --git a/lib/gitlab/gfm/ast/syntax/content.rb b/lib/gitlab/gfm/ast/syntax/content.rb new file mode 100644 index 000000000000..1f3d0105d216 --- /dev/null +++ b/lib/gitlab/gfm/ast/syntax/content.rb @@ -0,0 +1,20 @@ +module Gitlab + module Gfm + module Ast + module Syntax + ## + # Main GFM content + # + class Content < Node + def self.allowed + [Syntax::Markdown::CodeBlock, Syntax::Text] + end + + def self.pattern + /(?.+)/m + end + end + end + end + end +end diff --git a/lib/gitlab/gfm/ast/syntax/markdown/code_block.rb b/lib/gitlab/gfm/ast/syntax/markdown/code_block.rb new file mode 100644 index 000000000000..4665bd59d408 --- /dev/null +++ b/lib/gitlab/gfm/ast/syntax/markdown/code_block.rb @@ -0,0 +1,27 @@ +module Gitlab + module Gfm + module Ast + module Syntax + module Markdown + class CodeBlock < Node + def to_s + @match[:start_token] + @value + @match[:end_token] + end + + def lang + @match[:lang] + end + + def self.allowed + [] + end + + def self.pattern + /(?(```(?\w+)\n))(?.+?)(?\n```)/m + end + end + end + end + end + end +end diff --git a/lib/gitlab/gfm/ast/syntax/node.rb b/lib/gitlab/gfm/ast/syntax/node.rb new file mode 100644 index 000000000000..3ba9a02d53b8 --- /dev/null +++ b/lib/gitlab/gfm/ast/syntax/node.rb @@ -0,0 +1,88 @@ +module Gitlab + module Gfm + module Ast + module Syntax + class Node + attr_reader :text, :range, :parent, :value, :nodes + + def initialize(text, range, match, parent) + @text = text + @range = range + @match = match + @parent = parent + + @value = match[:value] + @nodes = [] + end + + ## + # Process children nodes + # + def process! + @nodes = lexer.new(@text, self.class.allowed, self).process! + end + + def index + @range.begin + end + + ## + # Method that is used to create a string representation of this node + # + def to_s + @text + end + + ## + # Is this node a leaf node? + # + def leaf? + @nodes.empty? + end + + ## + # Lexer for this node + # + def lexer + Ast::Lexer + end + + def <=>(other) + return unless other.kind_of?(Node) + + case + when index < other.index then -1 + when index == other.index then 0 + when index > other.index then 1 + end + end + + ## + # Better inspect + # + def inspect + "#{self.class.name} #{@range}: #{@nodes.inspect}" + end + + ## + # Nodes allowed inside this one. + # + # This is pipeline of lexemes, order is relevant. + # + def self.allowed + raise NotImplementedError + end + + ## + # Regexp pattern for this node + # + # Each pattern must contain at least `value` capture group. + # + def self.pattern + raise NotImplementedError + end + end + end + end + end +end diff --git a/lib/gitlab/gfm/ast/syntax/text.rb b/lib/gitlab/gfm/ast/syntax/text.rb new file mode 100644 index 000000000000..d3836f8c05e5 --- /dev/null +++ b/lib/gitlab/gfm/ast/syntax/text.rb @@ -0,0 +1,20 @@ +module Gitlab + module Gfm + module Ast + module Syntax + ## + # Text description + # + class Text < Node + def self.allowed + [] + end + + def self.pattern + /(?.+)/m + end + end + end + end + end +end diff --git a/spec/lib/gitlab/gfm/ast/lexer_spec.rb b/spec/lib/gitlab/gfm/ast/lexer_spec.rb new file mode 100644 index 000000000000..7a02f6a64a72 --- /dev/null +++ b/spec/lib/gitlab/gfm/ast/lexer_spec.rb @@ -0,0 +1,5 @@ +require 'spec_helper' + +describe Gitlab::Gfm::Ast::Lexer do + let(:parser) { described_class.new(text) } +end diff --git a/spec/lib/gitlab/gfm/ast/parser_spec.rb b/spec/lib/gitlab/gfm/ast/parser_spec.rb new file mode 100644 index 000000000000..c4ff7ddf520c --- /dev/null +++ b/spec/lib/gitlab/gfm/ast/parser_spec.rb @@ -0,0 +1,30 @@ +require 'spec_helper' + +describe Gitlab::Gfm::Ast::Parser do + let(:parser) { described_class.new(text) } + + describe '#tree' do + let(:tree) { parser.tree } + + context 'plain text' do + let(:text) { 'some plain text' } + + it 'returns valid root node' do + expect(tree).to be_a(Gitlab::Gfm::Ast::Syntax::Content) + end + end + + context 'plain text and ruby block' do + let(:text) { "some text\n\n\n```ruby\nblock\n```" } + + it 'contains two lexemes' do + expect(tree.nodes.count).to eq 2 + end + + it 'contains valid lexemes' do + expect(tree.nodes.first).to be_a Gitlab::Gfm::Ast::Syntax::Text + expect(tree.nodes.second).to be_a Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock + end + end + end +end diff --git a/spec/lib/gitlab/gfm/ast_spec.rb b/spec/lib/gitlab/gfm/ast_spec.rb new file mode 100644 index 000000000000..f9f988f5dd9e --- /dev/null +++ b/spec/lib/gitlab/gfm/ast_spec.rb @@ -0,0 +1,10 @@ +require 'spec_helper' + +describe Gitlab::Gfm::Ast do + describe '#parse' do + subject { described_class.parse(text) } + let(:text) { 'some text' } + + it { is_expected.to be_a Gitlab::Gfm::Ast::Syntax::Content } + end +end -- GitLab From 6640dbd90f157090bef488ba4c97892c6dc391d9 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Wed, 9 Mar 2016 14:21:46 +0100 Subject: [PATCH 2/4] Add method that recreates a string from GFM AST tree --- lib/gitlab/gfm/ast/lexer.rb | 17 +++++------------ lib/gitlab/gfm/ast/parser.rb | 12 +++++++----- lib/gitlab/gfm/ast/syntax/content.rb | 4 ++++ lib/gitlab/gfm/ast/syntax/node.rb | 2 +- lib/gitlab/gfm/ast/syntax/text.rb | 4 ++++ spec/lib/gitlab/gfm/ast/parser_spec.rb | 14 ++++++++++++-- 6 files changed, 33 insertions(+), 20 deletions(-) diff --git a/lib/gitlab/gfm/ast/lexer.rb b/lib/gitlab/gfm/ast/lexer.rb index 7897701303d4..adec09b62fd3 100644 --- a/lib/gitlab/gfm/ast/lexer.rb +++ b/lib/gitlab/gfm/ast/lexer.rb @@ -24,7 +24,7 @@ def process! end end - validate! + # TODO, validate! @nodes.each(&:process!) @nodes.sort! end @@ -39,21 +39,14 @@ def process! # def process_range(token, range) (@text[range]).scan(token.pattern).each do - match = Regexp.last_match - next if ranges_taken.any? { |taken| taken.include?(match.begin(0)) } + match, offset = Regexp.last_match, range.begin + range = (match.begin(0) + offset)...(match.end(0) + offset) - @nodes << lexeme(token, match) + next if ranges_taken.any? { |taken| taken.include?(range.begin) } + @nodes << token.new(match[0], range, match, @parent) end end - def lexeme(token, match) - text, range = match[0], (match.begin(0)..(match.end(0))) - token.new(text, range, match, @parent) - end - - def validate! - end - def ranges_taken @nodes.map(&:range) end diff --git a/lib/gitlab/gfm/ast/parser.rb b/lib/gitlab/gfm/ast/parser.rb index 0087f21e85a5..217856188ee7 100644 --- a/lib/gitlab/gfm/ast/parser.rb +++ b/lib/gitlab/gfm/ast/parser.rb @@ -2,18 +2,20 @@ module Gitlab module Gfm module Ast class Parser + attr_reader :tree + def initialize(text) @text = text + @lexer = Lexer.new(@text, [Syntax::Content]) + @nodes = @lexer.process! end def tree - content_nodes.first + @nodes.first end - private - - def content_nodes - Lexer.new(@text, [Syntax::Content]).process! + def recreate + tree.to_s end end end diff --git a/lib/gitlab/gfm/ast/syntax/content.rb b/lib/gitlab/gfm/ast/syntax/content.rb index 1f3d0105d216..7e3c6b7a8a4a 100644 --- a/lib/gitlab/gfm/ast/syntax/content.rb +++ b/lib/gitlab/gfm/ast/syntax/content.rb @@ -13,6 +13,10 @@ def self.allowed def self.pattern /(?.+)/m end + + def to_s + nodes.map(&:to_s).join + end end end end diff --git a/lib/gitlab/gfm/ast/syntax/node.rb b/lib/gitlab/gfm/ast/syntax/node.rb index 3ba9a02d53b8..f52e051aac63 100644 --- a/lib/gitlab/gfm/ast/syntax/node.rb +++ b/lib/gitlab/gfm/ast/syntax/node.rb @@ -30,7 +30,7 @@ def index # Method that is used to create a string representation of this node # def to_s - @text + raise NotImplementedError end ## diff --git a/lib/gitlab/gfm/ast/syntax/text.rb b/lib/gitlab/gfm/ast/syntax/text.rb index d3836f8c05e5..cfe634f43a44 100644 --- a/lib/gitlab/gfm/ast/syntax/text.rb +++ b/lib/gitlab/gfm/ast/syntax/text.rb @@ -6,6 +6,10 @@ module Syntax # Text description # class Text < Node + def to_s + @text + end + def self.allowed [] end diff --git a/spec/lib/gitlab/gfm/ast/parser_spec.rb b/spec/lib/gitlab/gfm/ast/parser_spec.rb index c4ff7ddf520c..fc9bf231e96c 100644 --- a/spec/lib/gitlab/gfm/ast/parser_spec.rb +++ b/spec/lib/gitlab/gfm/ast/parser_spec.rb @@ -15,16 +15,26 @@ end context 'plain text and ruby block' do - let(:text) { "some text\n\n\n```ruby\nblock\n```" } + let(:text) { "some text\n\n\n```ruby\nblock\n``` another text" } it 'contains two lexemes' do - expect(tree.nodes.count).to eq 2 + expect(tree.nodes.count).to eq 3 end it 'contains valid lexemes' do expect(tree.nodes.first).to be_a Gitlab::Gfm::Ast::Syntax::Text expect(tree.nodes.second).to be_a Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock + expect(tree.nodes.third).to be_a Gitlab::Gfm::Ast::Syntax::Text end end end + + describe '#recreate' do + let(:output) { parser.recreate } + let(:text) { "some text\n\n\n```ruby\nblock\n``` another text" } + + it 'matches original text' do + expect(output).to eq text + end + end end -- GitLab From fc290a7dcde969d956488612512d2fa4ab9d57c0 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 10 Mar 2016 10:40:38 +0100 Subject: [PATCH 3/4] Validate GFM AST lexer process, add some specs for it --- lib/gitlab/gfm/ast/lexer.rb | 31 ++++++++++++++----- spec/lib/gitlab/gfm/ast/lexer_spec.rb | 44 ++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 9 deletions(-) diff --git a/lib/gitlab/gfm/ast/lexer.rb b/lib/gitlab/gfm/ast/lexer.rb index adec09b62fd3..76929655ac7c 100644 --- a/lib/gitlab/gfm/ast/lexer.rb +++ b/lib/gitlab/gfm/ast/lexer.rb @@ -2,6 +2,8 @@ module Gitlab module Gfm module Ast class Lexer + class LexerError < StandardError; end + ## # GFM AST Lexer # @@ -18,31 +20,43 @@ def initialize(text, tokens, parent = nil) # We expect that all text is covered by lexemes. # def process! + process_nodes! + @nodes.each(&:process!) + @nodes.sort! + end + + private + + ## + # Processes lexeme nodes for each token in this lexer. + # + def process_nodes! + return if @tokens.empty? + @tokens.each do |token| ranges_available.each do |range| - process_range(token, range) + process_range!(range, token) end end - # TODO, validate! - @nodes.each(&:process!) - @nodes.sort! + unless ranges_available.empty? + raise LexerError, 'Unprocessed nodes detected!' + end end - private - ## # Processes a given range. # # If pattern is found in a range, but this range is already covered # by an existing node, we ommit this one (flat search). # - def process_range(token, range) + def process_range!(range, token) (@text[range]).scan(token.pattern).each do match, offset = Regexp.last_match, range.begin range = (match.begin(0) + offset)...(match.end(0) + offset) next if ranges_taken.any? { |taken| taken.include?(range.begin) } + @nodes << token.new(match[0], range, match, @parent) end end @@ -59,8 +73,9 @@ def ranges_available taken.concat(node.range.to_a) end - text_indexes = (0..@text.length).to_a + text_indexes = (0..(@text.length - 1)).to_a indexes_available = (text_indexes - indexes_taken).sort.uniq + indexes_available.inject([]) do |ranges, n| if ranges.empty? || ranges.last.last != n - 1 ranges + [n..n] diff --git a/spec/lib/gitlab/gfm/ast/lexer_spec.rb b/spec/lib/gitlab/gfm/ast/lexer_spec.rb index 7a02f6a64a72..4394195695db 100644 --- a/spec/lib/gitlab/gfm/ast/lexer_spec.rb +++ b/spec/lib/gitlab/gfm/ast/lexer_spec.rb @@ -1,5 +1,47 @@ require 'spec_helper' describe Gitlab::Gfm::Ast::Lexer do - let(:parser) { described_class.new(text) } + let(:lexer) { described_class.new(text, tokens) } + let(:nodes) { lexer.process! } + + context 'order of tokens' do + let(:tokens) do + [Gitlab::Gfm::Ast::Syntax::Text, + Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock] + end + + let(:text) { "text and ```ruby\nblock\n```" } + + it 'greedily matches tokens in order those are defined' do + expect(nodes.count).to eq 1 + expect(nodes.first).to be_a Gitlab::Gfm::Ast::Syntax::Text + end + end + + context 'uncovered ranges' do + let(:tokens) do + [Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock] + end + + let(:text) { "text and ```ruby\nblock\n```" } + + it 'raises error when uncovered ranges remain' do + expect { nodes }.to raise_error(Gitlab::Gfm::Ast::Lexer::LexerError, + /Unprocessed nodes detected/) + end + end + + context 'intersecting tokens' do + let(:tokens) do + [Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock, + Gitlab::Gfm::Ast::Syntax::Text] + end + + let(:text) { "```ruby\nsome text\n```" } + + it 'does not match intersecting tokens' do + expect(nodes.count).to eq 1 + expect(nodes.first.nodes.count).to eq 0 + end + end end -- GitLab From e04913840c3babfdad5b4ead21a07f6a1695144e Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Thu, 10 Mar 2016 13:07:31 +0100 Subject: [PATCH 4/4] Add some specs for GFM AST, minor refactorings --- lib/gitlab/gfm/ast/lexer.rb | 10 ++++ lib/gitlab/gfm/ast/parser.rb | 9 +--- lib/gitlab/gfm/ast/syntax/content.rb | 6 ++- .../gfm/ast/syntax/markdown/code_block.rb | 14 +++-- lib/gitlab/gfm/ast/syntax/node.rb | 52 +++++++++++-------- lib/gitlab/gfm/ast/syntax/text.rb | 10 ++-- .../lib/gitlab/gfm/ast/syntax/content_spec.rb | 29 +++++++++++ .../ast/syntax/markdown/code_block_spec.rb | 35 +++++++++++++ spec/lib/gitlab/gfm/ast/syntax/text_spec.rb | 31 +++++++++++ 9 files changed, 158 insertions(+), 38 deletions(-) create mode 100644 spec/lib/gitlab/gfm/ast/syntax/content_spec.rb create mode 100644 spec/lib/gitlab/gfm/ast/syntax/markdown/code_block_spec.rb create mode 100644 spec/lib/gitlab/gfm/ast/syntax/text_spec.rb diff --git a/lib/gitlab/gfm/ast/lexer.rb b/lib/gitlab/gfm/ast/lexer.rb index 76929655ac7c..df3633fdb279 100644 --- a/lib/gitlab/gfm/ast/lexer.rb +++ b/lib/gitlab/gfm/ast/lexer.rb @@ -84,6 +84,16 @@ def ranges_available end end end + + ## + # Processes single token, and returns first lexeme that has been + # created. + # + def self.single(text, token) + lexer = new(text, [token]) + nodes = lexer.process! + nodes.first + end end end end diff --git a/lib/gitlab/gfm/ast/parser.rb b/lib/gitlab/gfm/ast/parser.rb index 217856188ee7..db1bff320b00 100644 --- a/lib/gitlab/gfm/ast/parser.rb +++ b/lib/gitlab/gfm/ast/parser.rb @@ -2,16 +2,11 @@ module Gitlab module Gfm module Ast class Parser - attr_reader :tree + attr_reader :tree, :text def initialize(text) @text = text - @lexer = Lexer.new(@text, [Syntax::Content]) - @nodes = @lexer.process! - end - - def tree - @nodes.first + @tree = Lexer.single(text, Syntax::Content) end def recreate diff --git a/lib/gitlab/gfm/ast/syntax/content.rb b/lib/gitlab/gfm/ast/syntax/content.rb index 7e3c6b7a8a4a..2f14e44c64bc 100644 --- a/lib/gitlab/gfm/ast/syntax/content.rb +++ b/lib/gitlab/gfm/ast/syntax/content.rb @@ -6,7 +6,7 @@ module Syntax # Main GFM content # class Content < Node - def self.allowed + def allowed [Syntax::Markdown::CodeBlock, Syntax::Text] end @@ -14,6 +14,10 @@ def self.pattern /(?.+)/m end + def value + @text + end + def to_s nodes.map(&:to_s).join end diff --git a/lib/gitlab/gfm/ast/syntax/markdown/code_block.rb b/lib/gitlab/gfm/ast/syntax/markdown/code_block.rb index 4665bd59d408..401186a5a0c7 100644 --- a/lib/gitlab/gfm/ast/syntax/markdown/code_block.rb +++ b/lib/gitlab/gfm/ast/syntax/markdown/code_block.rb @@ -4,16 +4,20 @@ module Ast module Syntax module Markdown class CodeBlock < Node + def allowed + [] + end + def to_s - @match[:start_token] + @value + @match[:end_token] + @text end - def lang - @match[:lang] + def value + @text end - def self.allowed - [] + def lang + @match[:lang] end def self.pattern diff --git a/lib/gitlab/gfm/ast/syntax/node.rb b/lib/gitlab/gfm/ast/syntax/node.rb index f52e051aac63..0003704cfa47 100644 --- a/lib/gitlab/gfm/ast/syntax/node.rb +++ b/lib/gitlab/gfm/ast/syntax/node.rb @@ -3,7 +3,7 @@ module Gfm module Ast module Syntax class Node - attr_reader :text, :range, :parent, :value, :nodes + attr_reader :text, :range, :parent, :nodes def initialize(text, range, match, parent) @text = text @@ -16,14 +16,12 @@ def initialize(text, range, match, parent) end ## - # Process children nodes + # Nodes allowed inside this one. # - def process! - @nodes = lexer.new(@text, self.class.allowed, self).process! - end - - def index - @range.begin + # This is pipeline of lexemes, order is relevant. + # + def allowed + raise NotImplementedError end ## @@ -34,14 +32,35 @@ def to_s end ## - # Is this node a leaf node? + # Returns the value of this nodes, without node-specific tokens. + # + def value + raise NotImplementedError + end + + ## + # Process children nodes + # + def process! + @nodes = lexer.new(value, allowed, self).process! + end + + ## + # Position of this node in parent + # + def index + @range.begin + end + + ## + # Returns true if node is a leaf in the three. # def leaf? @nodes.empty? end ## - # Lexer for this node + # Each node can have it's own lexer. # def lexer Ast::Lexer @@ -65,18 +84,7 @@ def inspect end ## - # Nodes allowed inside this one. - # - # This is pipeline of lexemes, order is relevant. - # - def self.allowed - raise NotImplementedError - end - - ## - # Regexp pattern for this node - # - # Each pattern must contain at least `value` capture group. + # Regexp pattern for this token. # def self.pattern raise NotImplementedError diff --git a/lib/gitlab/gfm/ast/syntax/text.rb b/lib/gitlab/gfm/ast/syntax/text.rb index cfe634f43a44..2df68daf23ce 100644 --- a/lib/gitlab/gfm/ast/syntax/text.rb +++ b/lib/gitlab/gfm/ast/syntax/text.rb @@ -6,12 +6,16 @@ module Syntax # Text description # class Text < Node - def to_s + def allowed + [] + end + + def value @text end - def self.allowed - [] + def to_s + @text end def self.pattern diff --git a/spec/lib/gitlab/gfm/ast/syntax/content_spec.rb b/spec/lib/gitlab/gfm/ast/syntax/content_spec.rb new file mode 100644 index 000000000000..14b34fc77867 --- /dev/null +++ b/spec/lib/gitlab/gfm/ast/syntax/content_spec.rb @@ -0,0 +1,29 @@ +require 'spec_helper' + +describe Gitlab::Gfm::Ast::Syntax::Content do + describe 'token' do + let(:text) { "some multi\n\nline text" } + + it 'matches entire text' do + expect(text).to match(described_class.pattern) + end + end + + describe 'lexeme' do + let(:text) { "some text with ```ruby\nblock\n```" } + let(:lexeme) { Gitlab::Gfm::Ast::Lexer.single(text, described_class) } + + describe '#nodes' do + let(:nodes) { lexeme.nodes } + + it 'correctly instantiates children nodes' do + expect(nodes.count).to eq 2 + end + end + + describe '#to_s' do + subject { lexeme.to_s } + it { is_expected.to eq text } + end + end +end diff --git a/spec/lib/gitlab/gfm/ast/syntax/markdown/code_block_spec.rb b/spec/lib/gitlab/gfm/ast/syntax/markdown/code_block_spec.rb new file mode 100644 index 000000000000..c8a5c05310fd --- /dev/null +++ b/spec/lib/gitlab/gfm/ast/syntax/markdown/code_block_spec.rb @@ -0,0 +1,35 @@ +require 'spec_helper' + +describe Gitlab::Gfm::Ast::Syntax::Markdown::CodeBlock do + let(:text) { "```ruby\ncode block\n```" } + + describe 'token' do + it 'matches entire text' do + expect(text).to match described_class.pattern + end + end + + describe 'lexeme' do + let(:lexeme) { Gitlab::Gfm::Ast::Lexer.single(text, described_class) } + + describe '#nodes' do + subject { lexeme.nodes } + it { is_expected.to be_empty } + end + + describe '#leaf?' do + subject { lexeme.leaf? } + it { is_expected.to be true } + end + + describe '#to_s' do + subject { lexeme.to_s } + it { is_expected.to eq text } + end + + describe '#lang' do + subject { lexeme.lang } + it { is_expected.to eq 'ruby' } + end + end +end diff --git a/spec/lib/gitlab/gfm/ast/syntax/text_spec.rb b/spec/lib/gitlab/gfm/ast/syntax/text_spec.rb new file mode 100644 index 000000000000..0e532189e8cd --- /dev/null +++ b/spec/lib/gitlab/gfm/ast/syntax/text_spec.rb @@ -0,0 +1,31 @@ +require 'spec_helper' + +describe Gitlab::Gfm::Ast::Syntax::Text do + describe 'token' do + let(:text) { "some multi\n\nline text" } + + it 'matches entire text' do + expect(text).to match described_class.pattern + end + end + + describe 'lexeme' do + let(:text) { "some text with ```ruby\nblock\n```" } + let(:lexeme) { Gitlab::Gfm::Ast::Lexer.single(text, described_class) } + + describe '#nodes' do + subject { lexeme.nodes } + it { is_expected.to be_empty } + end + + describe '#leaf?' do + subject { lexeme.leaf? } + it { is_expected.to be true } + end + + describe '#to_s' do + subject { lexeme.to_s } + it { is_expected.to eq text } + end + end +end -- GitLab