Merged my new parser implementation that can handle irregular cases and is generally more flexible.

This commit is contained in:
Holger Rapp 2011-07-24 19:32:48 +02:00
commit b774ce8f11
3 changed files with 364 additions and 301 deletions

269
plugin/UltiSnips/Lexer.py Normal file
View File

@ -0,0 +1,269 @@
#!/usr/bin/env python
# encoding: utf-8
"""
Not really a Lexer in the classical sense, but code to hack Snippet Definitions
into Logical Units called Tokens.
"""
import string
import re
from Geometry import Position
__all__ = [
"tokenize", "EscapeCharToken", "TransformationToken", "TabStopToken",
"MirrorToken", "PythonCodeToken", "VimLCodeToken", "ShellCodeToken"
]
# Helper Classes {{{
class _TextIterator(object):
def __init__(self, text):
self._text = text
self._line = 0
self._col = 0
self._idx = 0
def __iter__(self):
return self
def next(self):
if self._idx >= len(self._text):
raise StopIteration
rv = self._text[self._idx]
if self._text[self._idx] in ('\n', '\r\n'):
self._line += 1
self._col = 0
else:
self._col += 1
self._idx += 1
return rv
def peek(self, count = 1):
try:
return self._text[self._idx:self._idx + count]
except IndexError:
return None
@property
def pos(self):
return Position(self._line, self._col)
# End: Helper Classes }}}
# Helper functions {{{
def _parse_number(stream):
"""
Expects the stream to contain a number next, returns the number
without consuming any more bytes
"""
rv = ""
while stream.peek() and stream.peek() in string.digits:
rv += stream.next()
return int(rv)
def _parse_till_closing_brace(stream):
"""
Returns all chars till a non-escaped } is found. Other
non escaped { are taken into account and skipped over.
Will also consume the closing }, but not return it
"""
rv = ""
in_braces = 1
while True:
if EscapeCharToken.starts_here(stream, '{}'):
rv += stream.next() + stream.next()
else:
c = stream.next()
if c == '{': in_braces += 1
elif c == '}': in_braces -= 1
if in_braces == 0: break
rv += c
return rv
def _parse_till_unescaped_char(stream, char):
"""
Returns all chars till a non-escaped `char` is found.
Will also consume the closing `char`, but not return it
"""
rv = ""
while True:
if EscapeCharToken.starts_here(stream, char):
rv += stream.next() + stream.next()
else:
c = stream.next()
if c == char: break
rv += c
return rv
# End: Helper functions }}}
# Tokens {{{
class Token(object):
def __init__(self, gen, indent):
self.initial_text = ""
self.start = gen.pos
self._parse(gen, indent)
self.end = gen.pos
class TabStopToken(Token):
CHECK = re.compile(r'^\${\d+[:}]')
@classmethod
def starts_here(klass, stream):
return klass.CHECK.match(stream.peek(10)) != None
def _parse(self, stream, indent):
stream.next() # $
stream.next() # {
self.no = _parse_number(stream)
if stream.peek() is ":":
stream.next()
self.initial_text = _parse_till_closing_brace(stream)
def __repr__(self):
return "TabStopToken(%r,%r,%r,%r)" % (
self.start, self.end, self.no, self.initial_text
)
class TransformationToken(Token):
CHECK = re.compile(r'^\${\d+\/')
@classmethod
def starts_here(klass, stream):
return klass.CHECK.match(stream.peek(10)) != None
def _parse(self, stream, indent):
stream.next() # $
stream.next() # {
self.no = _parse_number(stream)
stream.next() # /
self.search = _parse_till_unescaped_char(stream, '/')
self.replace = _parse_till_unescaped_char(stream, '/')
self.options = _parse_till_closing_brace(stream)
def __repr__(self):
return "TransformationToken(%r,%r,%r,%r,%r)" % (
self.start, self.end, self.no, self.search, self.replace
)
class MirrorToken(Token):
CHECK = re.compile(r'^\$\d+')
@classmethod
def starts_here(klass, stream):
return klass.CHECK.match(stream.peek(10)) != None
def _parse(self, stream, indent):
stream.next() # $
self.no = _parse_number(stream)
def __repr__(self):
return "MirrorToken(%r,%r,%r)" % (
self.start, self.end, self.no
)
class EscapeCharToken(Token):
@classmethod
def starts_here(klass, stream, chars = '{}\$`'):
cs = stream.peek(2)
if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars:
return True
def _parse(self, stream, indent):
stream.next() # \
self.initial_text = stream.next()
def __repr__(self):
return "EscapeCharToken(%r,%r,%r)" % (
self.start, self.end, self.initial_text
)
class ShellCodeToken(Token):
@classmethod
def starts_here(klass, stream):
return stream.peek(1) == '`'
def _parse(self, stream, indent):
stream.next() # `
self.code = _parse_till_unescaped_char(stream, '`')
def __repr__(self):
return "ShellCodeToken(%r,%r,%r)" % (
self.start, self.end, self.code
)
class PythonCodeToken(Token):
CHECK = re.compile(r'^`!p\s')
@classmethod
def starts_here(klass, stream):
return klass.CHECK.match(stream.peek(4)) is not None
def _parse(self, stream, indent):
for i in range(3):
stream.next() # `!p
if stream.peek() in '\t ':
stream.next()
code = _parse_till_unescaped_char(stream, '`')
# Strip the indent if any
if len(indent):
lines = code.splitlines()
self.code = lines[0] + '\n'
self.code += '\n'.join([l[len(indent):]
for l in lines[1:]])
else:
self.code = code
self.indent = indent
def __repr__(self):
return "PythonCodeToken(%r,%r,%r)" % (
self.start, self.end, self.code
)
class VimLCodeToken(Token):
CHECK = re.compile(r'^`!v\s')
@classmethod
def starts_here(klass, stream):
return klass.CHECK.match(stream.peek(4)) is not None
def _parse(self, stream, indent):
for i in range(4):
stream.next() # `!v
self.code = _parse_till_unescaped_char(stream, '`')
def __repr__(self):
return "VimLCodeToken(%r,%r,%r)" % (
self.start, self.end, self.code
)
# End: Tokens }}}
__ALLOWED_TOKENS = [
EscapeCharToken, TransformationToken, TabStopToken, MirrorToken,
PythonCodeToken, VimLCodeToken, ShellCodeToken
]
def tokenize(text, indent):
stream = _TextIterator(text)
while True:
done_something = False
for t in __ALLOWED_TOKENS:
if t.starts_here(stream):
yield t(stream, indent)
done_something = True
break
if not done_something:
stream.next()

View File

@ -10,6 +10,8 @@ import vim
from UltiSnips.Util import IndentUtil from UltiSnips.Util import IndentUtil
from UltiSnips.Buffer import TextBuffer from UltiSnips.Buffer import TextBuffer
from UltiSnips.Geometry import Span, Position from UltiSnips.Geometry import Span, Position
from UltiSnips.Lexer import tokenize, EscapeCharToken, TransformationToken, \
TabStopToken, MirrorToken, PythonCodeToken, VimLCodeToken, ShellCodeToken
__all__ = [ "Mirror", "Transformation", "SnippetInstance", "StartMarker" ] __all__ = [ "Mirror", "Transformation", "SnippetInstance", "StartMarker" ]
@ -109,315 +111,95 @@ class _CleverReplace(object):
return self._unescape(tv.decode("string-escape")) return self._unescape(tv.decode("string-escape"))
class _TOParser(object): class _TOParser(object):
# A simple tabstop with default value def __init__(self, parent_to, text, indent):
_TABSTOP = re.compile(r'''(?<![^\\]\\)\${(\d+)[:}]''')
# A mirror or a tabstop without default value.
_MIRROR_OR_TS = re.compile(r'(?<![^\\]\\)\$(\d+)')
# A mirror or a tabstop without default value.
_TRANSFORMATION = re.compile(r'(?<![^\\]\\)\${(\d+)/(.*?)/(.*?)/([a-zA-z]*)}')
# The beginning of a shell code fragment
_SHELLCODE = re.compile(r'(?<![^\\]\\)`')
# The beginning of a python code fragment
_PYTHONCODE = re.compile(r'(?<![^\\]\\)`!p')
# The beginning of a vimL code fragment
_VIMCODE = re.compile(r'(?<![^\\]\\)`!v')
# Escaped characters in substrings
_UNESCAPE = re.compile(r'\\[`$\\]')
def __init__(self, parent, val, indent):
self._v = val
self._p = parent
self._indent = indent self._indent = indent
self._parent_to = parent_to
self._childs = [] self._text = text
def __repr__(self):
return "TOParser(%s)" % self._p
def parse(self): def parse(self):
self._parse_tabs() seen_ts = {}
self._parse_pythoncode() all_tokens = []
self._parse_vimlcode()
self._parse_shellcode()
self._parse_transformations()
self._parse_mirrors_or_ts()
self._parse_escaped_chars() self._do_parse(all_tokens, seen_ts)
self._finish() self._resolve_ambiguity(all_tokens, seen_ts)
self._create_objects_with_links_to_tabs(all_tokens, seen_ts)
#################
# Escaped chars #
#################
def _parse_escaped_chars(self):
m = self._UNESCAPE.search(self._v)
while m:
self._handle_unescape(m)
m = self._UNESCAPE.search(self._v)
for c in self._childs:
c._parse_escaped_chars()
def _handle_unescape(self, m):
start_pos = m.start()
end_pos = start_pos + 2
char = self._v[start_pos+1]
start, end = self._get_start_end(self._v,start_pos,end_pos)
self._overwrite_area(start_pos,end_pos)
return EscapedChar(self._p, start, end, char)
##############
# Shell Code #
##############
def _parse_shellcode(self):
m = self._SHELLCODE.search(self._v)
while m:
self._handle_shellcode(m)
m = self._SHELLCODE.search(self._v)
for c in self._childs:
c._parse_shellcode()
def _handle_shellcode(self, m):
start_pos = m.start()
end_pos = self._find_closing_bt(start_pos+1)
content = self._v[start_pos+1:end_pos-1]
start, end = self._get_start_end(self._v,start_pos,end_pos)
self._overwrite_area(start_pos,end_pos)
return ShellCode(self._p, start, end, content)
###############
# Python Code #
###############
def _parse_pythoncode(self):
m = self._PYTHONCODE.search(self._v)
while m:
self._handle_pythoncode(m)
m = self._PYTHONCODE.search(self._v)
for c in self._childs:
c._parse_pythoncode()
def _handle_pythoncode(self, m):
start_pos = m.start()
end_pos = self._find_closing_bt(start_pos+1)
# Strip `!p `
content = self._v[start_pos+3:end_pos-1]
start, end = self._get_start_end(self._v,start_pos,end_pos)
self._overwrite_area(start_pos,end_pos)
# Strip the indent if any
if len(self._indent):
lines = content.splitlines()
new_content = lines[0] + '\n'
new_content += '\n'.join([l[len(self._indent):]
for l in lines[1:]])
else:
new_content = content
new_content = new_content.strip()
return PythonCode(self._p, start, end, new_content, self._indent)
#############
# VimL Code #
#############
def _parse_vimlcode(self):
m = self._VIMCODE.search(self._v)
while m:
self._handle_vimlcode(m)
m = self._VIMCODE.search(self._v)
for c in self._childs:
c._parse_vimlcode()
def _handle_vimlcode(self, m):
start_pos = m.start()
end_pos = self._find_closing_bt(start_pos+1)
# Strip `!v `
content = self._v[start_pos+3:end_pos-1]
start, end = self._get_start_end(self._v,start_pos,end_pos)
self._overwrite_area(start_pos,end_pos)
return VimLCode(self._p, start, end, content)
########
# TABS #
########
def _parse_tabs(self):
ts = []
m = self._TABSTOP.search(self._v)
while m:
ts.append(self._handle_tabstop(m))
m = self._TABSTOP.search(self._v)
for t, def_text in ts:
child_parser = _TOParser(t, def_text, self._indent)
child_parser._parse_tabs()
self._childs.append(child_parser)
def _handle_tabstop(self, m):
def _find_closingbracket(v,start_pos):
bracks_open = 1
for idx, c in enumerate(v[start_pos:]):
if c == '{':
if v[idx+start_pos-1] != '\\':
bracks_open += 1
elif c == '}':
if v[idx+start_pos-1] != '\\':
bracks_open -= 1
if not bracks_open:
return start_pos+idx+1
start_pos = m.start()
end_pos = _find_closingbracket(self._v, start_pos+2)
def_text = self._v[m.end():end_pos-1]
start, end = self._get_start_end(self._v,start_pos,end_pos)
no = int(m.group(1))
ts = TabStop(no, self._p, start, end, def_text)
self._p._add_tabstop(no,ts)
self._overwrite_area(start_pos, end_pos)
return ts, def_text
###################
# TRANSFORMATIONS #
###################
def _parse_transformations(self):
self._trans = []
for m in self._TRANSFORMATION.finditer(self._v):
self._trans.append(self._handle_transformation(m))
for t in self._childs:
t._parse_transformations()
def _handle_transformation(self, m):
no = int(m.group(1))
search = m.group(2)
replace = m.group(3)
options = m.group(4)
start_pos, end_pos = m.span()
start, end = self._get_start_end(self._v,start_pos,end_pos)
self._overwrite_area(*m.span())
return Transformation(self._p, no, start, end, search, replace, options)
##################### #####################
# MIRRORS OR TS: $1 # # Private Functions #
##################### #####################
def _parse_mirrors_or_ts(self): def _resolve_ambiguity(self, all_tokens, seen_ts):
for m in self._MIRROR_OR_TS.finditer(self._v): for parent, token in all_tokens:
self._handle_ts_or_mirror(m) if isinstance(token, MirrorToken):
if token.no not in seen_ts:
ts = TabStop(parent, token)
seen_ts[token.no] = ts
parent._add_tabstop(token.no,ts)
else:
Mirror(parent, seen_ts[token.no], token)
for t in self._childs: def _create_objects_with_links_to_tabs(self, all_tokens, seen_ts):
t._parse_mirrors_or_ts() for parent, token in all_tokens:
if isinstance(token, TransformationToken):
if token.no not in seen_ts:
raise RuntimeError("Tabstop %i is not known but is used by a Transformation" % t._ts)
Transformation(parent, seen_ts[token.no], token)
def _handle_ts_or_mirror(self, m): def _do_parse(self, all_tokens, seen_ts):
no = int(m.group(1)) tokens = list(tokenize(self._text, self._indent))
start_pos, end_pos = m.span() for token in tokens:
start, end = self._get_start_end(self._v,start_pos,end_pos) all_tokens.append((self._parent_to, token))
ts = self._p._get_tabstop(self._p, no) if isinstance(token, TabStopToken):
if ts is not None: ts = TabStop(self._parent_to, token)
rv = Mirror(self._p, ts, start, end) seen_ts[token.no] = ts
else: self._parent_to._add_tabstop(token.no,ts)
rv = TabStop(no, self._p, start, end)
self._p._add_tabstop(no,rv)
self._overwrite_area(*m.span()) k = _TOParser(ts, ts.current_text, self._indent)
k._do_parse(all_tokens, seen_ts)
return rv elif isinstance(token, EscapeCharToken):
EscapedChar(self._parent_to, token)
################### elif isinstance(token, ShellCodeToken):
# Resolve symbols # ShellCode(self._parent_to, token)
################### elif isinstance(token, PythonCodeToken):
def _finish(self): PythonCode(self._parent_to, token)
for c in self._childs: elif isinstance(token, VimLCodeToken):
c._finish() VimLCode(self._parent_to, token)
for t in self._trans:
ts = self._p._get_tabstop(self._p,t._ts)
if ts is None:
raise RuntimeError, "Tabstop %i is not known" % t._ts
t._ts = ts
####################
# Helper functions #
####################
def _find_closing_bt(self, start_pos):
for idx,c in enumerate(self._v[start_pos:]):
if c == '`' and self._v[idx+start_pos-1] != '\\':
return idx + start_pos + 1
def _get_start_end(self, val, start_pos, end_pos):
def _get_pos(s, pos):
line_idx = s[:pos].count('\n')
line_start = s[:pos].rfind('\n') + 1
start_in_line = pos - line_start
return Position(line_idx, start_in_line)
return _get_pos(val, start_pos), _get_pos(val, end_pos)
def _overwrite_area(self, s, e):
"""Overwrite the given span with spaces. But keep newlines in place"""
area = self._v[s:e]
area = '\n'.join( [" "*len(i) for i in area.splitlines()] )
self._v = self._v[:s] + area + self._v[e:]
########################################################################### ###########################################################################
# Public classes # # Public classes #
########################################################################### ###########################################################################
class TextObject(object): class TextObject(object):
""" """
This base class represents any object in the text This base class represents any object in the text
that has a span in any ways that has a span in any ways
""" """
def __init__(self, parent, start, end, initial_text): def __init__(self, parent, token, end = None, initial_text = ""):
self._start = start
self._end = end
self._parent = parent self._parent = parent
if end is not None: # Took 4 arguments
self._start = token
self._end = end
self._current_text = TextBuffer(initial_text)
else: # Initialize from token
self._start = token.start
self._end = token.end
self._current_text = TextBuffer(token.initial_text)
self._childs = [] self._childs = []
self._tabstops = {} self._tabstops = {}
if parent is not None: if parent is not None:
parent._add_child(self) parent._add_child(self)
self._current_text = TextBuffer(initial_text)
self._cts = 0 self._cts = 0
def __cmp__(self, other): def __cmp__(self, other):
return cmp(self._start, other._start) return cmp(self._start, other._start)
############## ##############
# PROPERTIES # # PROPERTIES #
############## ##############
@ -545,7 +327,6 @@ class TextObject(object):
return max(posible_sol) return max(posible_sol)
############################### ###############################
# Private/Protected functions # # Private/Protected functions #
############################### ###############################
@ -605,7 +386,6 @@ class EscapedChar(TextObject):
""" """
pass pass
class StartMarker(TextObject): class StartMarker(TextObject):
""" """
This class only remembers it's starting position. It is used to This class only remembers it's starting position. It is used to
@ -614,15 +394,15 @@ class StartMarker(TextObject):
""" """
def __init__(self, start): def __init__(self, start):
end = Position(start.line, start.col) end = Position(start.line, start.col)
TextObject.__init__(self, None, start, end, "") TextObject.__init__(self, None, start, end)
class Mirror(TextObject): class Mirror(TextObject):
""" """
A Mirror object mirrors a TabStop that is, text is repeated here A Mirror object mirrors a TabStop that is, text is repeated here
""" """
def __init__(self, parent, ts, start, end): def __init__(self, parent, ts, token):
TextObject.__init__(self, parent, start, end, "") TextObject.__init__(self, parent, token)
self._ts = ts self._ts = ts
@ -634,19 +414,19 @@ class Mirror(TextObject):
class Transformation(Mirror): class Transformation(Mirror):
def __init__(self, parent, ts, start, end, s, r, options): def __init__(self, parent, ts, token):
Mirror.__init__(self, parent, ts, start, end) Mirror.__init__(self, parent, ts, token)
flags = 0 flags = 0
self._match_this_many = 1 self._match_this_many = 1
if options: if token.options:
if "g" in options: if "g" in token.options:
self._match_this_many = 0 self._match_this_many = 0
if "i" in options: if "i" in token.options:
flags |= re.IGNORECASE flags |= re.IGNORECASE
self._find = re.compile(s, flags | re.DOTALL) self._find = re.compile(token.search, flags | re.DOTALL)
self._replace = _CleverReplace(r) self._replace = _CleverReplace(token.replace)
def _do_update(self): def _do_update(self):
t = self._ts.current_text t = self._ts.current_text
@ -657,9 +437,8 @@ class Transformation(Mirror):
return "Transformation(%s -> %s)" % (self._start, self._end) return "Transformation(%s -> %s)" % (self._start, self._end)
class ShellCode(TextObject): class ShellCode(TextObject):
def __init__(self, parent, start, end, code): def __init__(self, parent, token):
code = token.code.replace("\\`", "`")
code = code.replace("\\`", "`")
# Write the code to a temporary file # Write the code to a temporary file
handle, path = tempfile.mkstemp(text=True) handle, path = tempfile.mkstemp(text=True)
@ -678,16 +457,17 @@ class ShellCode(TextObject):
os.unlink(path) os.unlink(path)
TextObject.__init__(self, parent, start, end, output) token.initial_text = output
TextObject.__init__(self, parent, token)
def __repr__(self): def __repr__(self):
return "ShellCode(%s -> %s)" % (self._start, self._end) return "ShellCode(%s -> %s)" % (self._start, self._end)
class VimLCode(TextObject): class VimLCode(TextObject):
def __init__(self, parent, start, end, code): def __init__(self, parent, token):
self._code = code.replace("\\`", "`").strip() self._code = token.code.replace("\\`", "`").strip()
TextObject.__init__(self, parent, start, end, "") TextObject.__init__(self, parent, token)
def _do_update(self): def _do_update(self):
self.current_text = str(vim.eval(self._code)) self.current_text = str(vim.eval(self._code))
@ -844,9 +624,9 @@ class SnippetUtil(object):
class PythonCode(TextObject): class PythonCode(TextObject):
def __init__(self, parent, start, end, code, indent=""): def __init__(self, parent, token):
code = code.replace("\\`", "`") code = token.code.replace("\\`", "`")
# Find our containing snippet for snippet local data # Find our containing snippet for snippet local data
snippet = parent snippet = parent
@ -855,7 +635,7 @@ class PythonCode(TextObject):
snippet = snippet._parent snippet = snippet._parent
except AttributeError: except AttributeError:
snippet = None snippet = None
self._snip = SnippetUtil(indent) self._snip = SnippetUtil(token.indent)
self._locals = snippet.locals self._locals = snippet.locals
self._globals = {} self._globals = {}
@ -865,7 +645,7 @@ class PythonCode(TextObject):
# Add Some convenience to the code # Add Some convenience to the code
self._code = "import re, os, vim, string, random\n" + code self._code = "import re, os, vim, string, random\n" + code
TextObject.__init__(self, parent, start, end, "") TextObject.__init__(self, parent, token)
def _do_update(self): def _do_update(self):
@ -903,9 +683,13 @@ class TabStop(TextObject):
This is the most important TextObject. A TabStop is were the cursor This is the most important TextObject. A TabStop is were the cursor
comes to rest when the user taps through the Snippet. comes to rest when the user taps through the Snippet.
""" """
def __init__(self, no, parent, start, end, default_text = ""): def __init__(self, parent, token, start = None, end = None):
TextObject.__init__(self, parent, start, end, default_text) if start is not None:
self._no = no self._no = token
TextObject.__init__(self, parent, start, end)
else:
TextObject.__init__(self, parent, token)
self._no = token.no
def no(self): def no(self):
return self._no return self._no
@ -952,7 +736,7 @@ class SnippetInstance(TextObject):
col -= self.start.col col -= self.start.col
start = Position(delta.line, col) start = Position(delta.line, col)
end = Position(delta.line, col) end = Position(delta.line, col)
ts = TabStop(0, self, start, end, "") ts = TabStop(self, 0, start, end)
self._add_tabstop(0,ts) self._add_tabstop(0,ts)
self.update() self.update()
@ -1000,5 +784,3 @@ class SnippetInstance(TextObject):
return ts return ts
return self._tabstops[self._cts] return self._tabstops[self._cts]

12
test.py
View File

@ -443,6 +443,18 @@ class TabStop_EscapingCharsDollars(_VimTest):
snippets = ("test", r"snip \$0 $$0 end") snippets = ("test", r"snip \$0 $$0 end")
keys = "test" + EX + "hi" keys = "test" + EX + "hi"
wanted = "snip $0 $hi end" wanted = "snip $0 $hi end"
class TabStop_EscapingCharsDollars1(_VimTest):
snippets = ("test", r"a\${1:literal}")
keys = "test" + EX
wanted = "a${1:literal}"
class TabStop_EscapingCharsDollars_BeginningOfLine(_VimTest):
snippets = ("test", "\n\\${1:literal}")
keys = "test" + EX
wanted = "\n${1:literal}"
class TabStop_EscapingCharsDollars_BeginningOfDefinitionText(_VimTest):
snippets = ("test", "\\${1:literal}")
keys = "test" + EX
wanted = "${1:literal}"
class TabStop_EscapingChars_Backslash(_VimTest): class TabStop_EscapingChars_Backslash(_VimTest):
snippets = ("test", r"This \ is a backslash!") snippets = ("test", r"This \ is a backslash!")
keys = "test" + EX keys = "test" + EX