From 4fe0cd4da35e7ef08ac68fa55b19f832c899d7e4 Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Fri, 1 Apr 2011 20:12:15 +0200 Subject: [PATCH 01/12] Added a failing test that can not be corrected with our current parser --- test.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test.py b/test.py index effd6bb..803d3a8 100755 --- a/test.py +++ b/test.py @@ -443,6 +443,18 @@ class TabStop_EscapingCharsDollars(_VimTest): snippets = ("test", r"snip \$0 $$0 end") keys = "test" + EX + "hi" wanted = "snip $0 $hi end" +class TabStop_EscapingCharsDollars1(_VimTest): + snippets = ("test", r"a\${1:literal}") + keys = "test" + EX + wanted = "a${1:literal}" +class TabStop_EscapingCharsDollars_BeginningOfLine(_VimTest): + snippets = ("test", "\n\\${1:literal}") + keys = "test" + EX + wanted = "\n${1:literal}" +class TabStop_EscapingCharsDollars_BeginningOfDefinitionText(_VimTest): + snippets = ("test", "\\${1:literal}") + keys = "test" + EX + wanted = "${1:literal}" class TabStop_EscapingChars_Backslash(_VimTest): snippets = ("test", r"This \ is a backslash!") keys = "test" + EX From cdb4558f791116673318fc184d15d5d29eeffa6b Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Fri, 1 Apr 2011 21:03:25 +0200 Subject: [PATCH 02/12] First tryouts.. Nothing working so far and all is horrible ugly --- plugin/UltiSnips/TextObjects.py | 72 +++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index d7d2f1f..39b9310 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -13,6 +13,10 @@ from UltiSnips.Geometry import Span, Position __all__ = [ "Mirror", "Transformation", "SnippetInstance", "StartMarker" ] +from itertools import takewhile + +from debug import debug + ########################################################################### # Helper class # ########################################################################### @@ -135,16 +139,35 @@ class _TOParser(object): return "TOParser(%s)" % self._p def parse(self): - self._parse_tabs() - self._parse_pythoncode() - self._parse_vimlcode() - self._parse_shellcode() - self._parse_transformations() - self._parse_mirrors_or_ts() + text = "" - self._parse_escaped_chars() + idx = 0 + while idx < len(self._v): + if self._v[idx] == '\\': + text += self._v[idx+1] + idx += 2 + elif self._v[idx:].startswith("${"): + didx, dtext = self._parse_tabstop(idx, self._v) + debug("%r, %r" %(didx,dtext)) + idx += didx + text += dtext + else: + text += self._v[idx] + idx += 1 - self._finish() + self._v = text + + # self._parse_tabs() + # self._parse_pythoncode() + # self._parse_vimlcode() + # + # self._parse_shellcode() + # self._parse_transformations() + # self._parse_mirrors_or_ts() + + # self._parse_escaped_chars() + + # self._finish() ################# # Escaped chars # @@ -253,24 +276,10 @@ class _TOParser(object): return VimLCode(self._p, start, end, content) - - ######## # TABS # ######## - def _parse_tabs(self): - ts = [] - m = self._TABSTOP.search(self._v) - while m: - ts.append(self._handle_tabstop(m)) - m = self._TABSTOP.search(self._v) - - for t, def_text in ts: - child_parser = _TOParser(t, def_text, self._indent) - child_parser._parse_tabs() - self._childs.append(child_parser) - - def _handle_tabstop(self, m): + def _parse_tabstop(self, start_pos, v): def _find_closingbracket(v,start_pos): bracks_open = 1 for idx, c in enumerate(v[start_pos:]): @@ -283,9 +292,9 @@ class _TOParser(object): if not bracks_open: return start_pos+idx+1 - start_pos = m.start() end_pos = _find_closingbracket(self._v, start_pos+2) + m = self._TABSTOP.match(v[start_pos:]) def_text = self._v[m.end():end_pos-1] start, end = self._get_start_end(self._v,start_pos,end_pos) @@ -297,7 +306,19 @@ class _TOParser(object): self._overwrite_area(start_pos, end_pos) - return ts, def_text + return end_pos-start_pos, def_text + ts = [] + # m = self._TABSTOP.search(self._v) + + # while m: + # ts.append(self._handle_tabstop(m)) + # m = self._TABSTOP.search(self._v) + + # for t, def_text in ts: + # child_parser = _TOParser(t, def_text, self._indent) + # child_parser._parse_tabs() + # self._childs.append(child_parser) + ################### # TRANSFORMATIONS # @@ -934,6 +955,7 @@ class SnippetInstance(TextObject): TextObject.__init__(self, parent, start, end, initial_text) + debug("initial_text: %r" % (initial_text)) _TOParser(self, initial_text, indent).parse() # Check if we have a zero Tab, if not, add one at the end From 4307612aa81fb4142247c24dce14e6e639d06bb1 Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sat, 23 Jul 2011 23:40:01 +0200 Subject: [PATCH 03/12] New stateful parser. Still flawed design and not flexible enough, but a beginning --- plugin/UltiSnips/TextObjects.py | 253 ++++++++++++++++++++++++++++++-- 1 file changed, 237 insertions(+), 16 deletions(-) diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index 39b9310..df77cf5 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -129,6 +129,8 @@ class _TOParser(object): _UNESCAPE = re.compile(r'\\[`$\\]') def __init__(self, parent, val, indent): + + self._v = val self._p = parent self._indent = indent @@ -139,23 +141,28 @@ class _TOParser(object): return "TOParser(%s)" % self._p def parse(self): - text = "" + val = self._v + # self._v = "" + s = SnippetParser(self, val) + s.parse() - idx = 0 - while idx < len(self._v): - if self._v[idx] == '\\': - text += self._v[idx+1] - idx += 2 - elif self._v[idx:].startswith("${"): - didx, dtext = self._parse_tabstop(idx, self._v) - debug("%r, %r" %(didx,dtext)) - idx += didx - text += dtext - else: - text += self._v[idx] - idx += 1 + # text = "" - self._v = text + # idx = 0 + # while idx < len(self._v): + # if self._v[idx] == '\\': + # text += self._v[idx+1] + # idx += 2 + # elif self._v[idx:].startswith("${"): + # didx, dtext = self._parse_tabstop(idx, self._v) + # debug("%r, %r" %(didx,dtext)) + # idx += didx + # text += dtext + # else: + # text += self._v[idx] + # idx += 1 + + # self._v = text # self._parse_tabs() # self._parse_pythoncode() @@ -955,7 +962,6 @@ class SnippetInstance(TextObject): TextObject.__init__(self, parent, start, end, initial_text) - debug("initial_text: %r" % (initial_text)) _TOParser(self, initial_text, indent).parse() # Check if we have a zero Tab, if not, add one at the end @@ -1023,4 +1029,219 @@ class SnippetInstance(TextObject): return self._tabstops[self._cts] +## TODO: everything below here should be it's own module +from debug import debug +import string + +class TextIterator(object): + def __init__(self, text): + self._text = text + self._line = 0 + self._col = 0 + + self._idx = 0 + + def __iter__(self): + return self + + def next(self): + if self._idx >= len(self._text): + raise StopIteration + + rv = self._text[self._idx] + if self._idx > 0 and self._text[self._idx - 1] in ('\n', '\r\n'): + self._line += 1 + self._col = 0 + else: + self._col += 1 + self._idx += 1 + + return rv + + def peek(self, count = 1): + try: + return self._text[self._idx:self._idx + count] + except IndexError: + return None + + @property + def idx(self): + return self._idx + + @property + def pos(self): + return Position(self._line, self._col) + + @property + def exhausted(self): + return self._idx >= len(self._text) + +class Token(object): + pass + +class LiteralTextToken(Token): + def __init__(self, text): + self.text = text + +class TabStopToken(Token): + def __init__(self, number, start, end, default_text): + self.no = number + self.start = start + self.end = end + self.default_text = default_text + +class MirrorToken(Token): + def __init__(self, number, start, end): + self.no = number + self.start = start + self.end = end + +class ParsingMode(object): + pass + +class LiteralMode(ParsingMode): + def __init__(self): + self._text = "" + + def run(self, gen): + for c in gen: + if c is '\\': + self._text += gen.next() + else: + self._text += c + + if gen.peek(2) == '${': + return "tabstop" + if gen.peek(1) == '$' and gen.peek(2)[-1] in string.digits: + # TODO: this is not strong enough. + return "tabstop_or_mirror" + + def finish(self): + return LiteralTextToken(self._text) + + +class TabStopMode(ParsingMode): + def __init__(self): + self._handler = self._parse_number + self._number = "" + self._default_text = "" + + def _parse_number(self, gen): + debug("gen.pos: %s, gen.peek(: %s" % (gen.pos, gen.peek())) + while gen.peek() in string.digits: + self._number += gen.next() + if gen.peek() is ":": + gen.next() + + self._number = int(self._number) + self._handler = self._parse_default_text + + def _parse_default_text(self, gen): + for c in gen: + if c == '}': return False + self._default_text += c + return False + + def run(self, gen): + self._start = gen.pos + + gen.next() # $ + gen.next() # { + + while self._handler(gen) != False: + pass + + self._end = gen.pos + + def finish(self): + return TabStopToken( + self._number, self._start, self._end, self._default_text + ) + +class TabStopOrMirrorMode(ParsingMode): + def __init__(self): + self._number = "" + + def run(self, gen): + self._start = gen.pos + + gen.next() + + while gen.peek() in string.digits: + self._number += gen.next() + + self._end = gen.pos + + def finish(self): + return MirrorToken( + int(self._number), self._start, self._end + ) + + +class SnippetParser(object): + MODES = { + "literal": LiteralMode, + "tabstop": TabStopMode, + "tabstop_or_mirror": TabStopOrMirrorMode, + } + + def __init__(self, parent, text): + debug("text: %s" % (text)) + self.current_to = parent._p + self.stream = TextIterator(text) + self.mode = None + + self.tokens = [] + + def parse(self): + self.switch_mode("literal") + + seen_ts = set() + + debug("tokens: %s" % (self.tokens)) + for token in self.tokens: + if isinstance(token, LiteralTextToken): + # self.current_to._v += LiteralTextToken.text + pass + elif isinstance(token, TabStopToken): + # TODO: could also take the token directly + debug("token.start: %s, token.end: %s" % (token.start, token.end)) + ts = TabStop(token.no, self.current_to, + token.start, token.end, token.default_text) + seen_ts.add(token.no) + self.current_to._add_tabstop(token.no,ts) + + + for token in self.tokens: + if isinstance(token, MirrorToken): + # TODO: maybe we can get rid of _get_tabstop and _add_tabstop + if token.no not in seen_ts: + debug("token.start: %s, token.end: %s" % (token.start, token.end)) + ts = TabStop(token.no, self.current_to, + token.start, token.end) + debug("ALIVE1" % ()) + seen_ts.add(token.no) + debug("ALIVE2" % ()) + self.current_to._add_tabstop(token.no,ts) + debug("ALIVE3" % ()) + else: + raise RuntimeError("Never here!") + Mirror(self.current_to, self.current_to._get_tabstop(self.current_to, token.no), token.start, token.end) + + + + + + + def switch_mode(self, mode_name): + while not self.stream.exhausted: + self.mode = self.MODES[mode_name]() + mode_name = self.mode.run(self.stream) + self.tokens.append(self.mode.finish()) + mode_name = mode_name or "literal" + + + + + From b91e97fa5dba937f4b548779c09bfc147ec14982 Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 09:31:25 +0200 Subject: [PATCH 04/12] Homing in on a design --- plugin/UltiSnips/TextObjects.py | 201 ++++++++++++-------------------- test.py | 4 +- 2 files changed, 79 insertions(+), 126 deletions(-) diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index df77cf5..3d408ed 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -1066,7 +1066,7 @@ class TextIterator(object): @property def idx(self): - return self._idx + return self._idx # TODO: does this need to be exposed? @property def pos(self): @@ -1077,133 +1077,104 @@ class TextIterator(object): return self._idx >= len(self._text) class Token(object): - pass + def __init__(self, gen): + self.start = gen.pos + self._parse(gen) + self.end = gen.pos -class LiteralTextToken(Token): - def __init__(self, text): - self.text = text + +def _parse_number(stream): + # TODO: document me + rv = "" + while stream.peek() in string.digits: + rv += stream.next() + + return int(rv) + +def _parse_till_closing_brace(stream): + # TODO: document me + rv = "" + for c in stream: + if c == '}': break # TODO: must count braces + rv += c + return rv class TabStopToken(Token): - def __init__(self, number, start, end, default_text): - self.no = number - self.start = start - self.end = end - self.default_text = default_text + @classmethod + def check(klass, stream): + # TODO: bad name for function + return stream.peek(2) == '${' + + def __init__(self, gen): + Token.__init__(self, gen) + + def _parse(self, stream): + stream.next() # $ + stream.next() # { + + self.no = _parse_number(stream) + + self.default_text = "" + if stream.peek() is ":": + stream.next() + self.default_text = _parse_till_closing_brace(stream) class MirrorToken(Token): - def __init__(self, number, start, end): - self.no = number - self.start = start - self.end = end + CHECK = re.compile(r'^\$\d+\s') + + @classmethod + def check(klass, stream): + # TODO: bad name for function + return klass.CHECK.match(stream.peek(10)) != None + + def __init__(self, stream): + self.no = "" + + Token.__init__(self, stream) # TODO: check for gen usage + + def _parse(self, stream): + stream.next() # $ + while stream.peek() in string.digits: + self.no += stream.next() + self.no = int(self.no) + +class EscapeCharToken(Token): + @classmethod + def check(klass, stream): + return stream.peek(1) == '\\' class ParsingMode(object): - pass + def tokens(self, stream): + while True: + done_something = False + for t in self.ALLOWED_TOKENS: + if t.check(stream): + yield t(stream) + done_something = True + break + if not done_something: + stream.next() class LiteralMode(ParsingMode): - def __init__(self): - self._text = "" - - def run(self, gen): - for c in gen: - if c is '\\': - self._text += gen.next() - else: - self._text += c - - if gen.peek(2) == '${': - return "tabstop" - if gen.peek(1) == '$' and gen.peek(2)[-1] in string.digits: - # TODO: this is not strong enough. - return "tabstop_or_mirror" - - def finish(self): - return LiteralTextToken(self._text) - - -class TabStopMode(ParsingMode): - def __init__(self): - self._handler = self._parse_number - self._number = "" - self._default_text = "" - - def _parse_number(self, gen): - debug("gen.pos: %s, gen.peek(: %s" % (gen.pos, gen.peek())) - while gen.peek() in string.digits: - self._number += gen.next() - if gen.peek() is ":": - gen.next() - - self._number = int(self._number) - self._handler = self._parse_default_text - - def _parse_default_text(self, gen): - for c in gen: - if c == '}': return False - self._default_text += c - return False - - def run(self, gen): - self._start = gen.pos - - gen.next() # $ - gen.next() # { - - while self._handler(gen) != False: - pass - - self._end = gen.pos - - def finish(self): - return TabStopToken( - self._number, self._start, self._end, self._default_text - ) - -class TabStopOrMirrorMode(ParsingMode): - def __init__(self): - self._number = "" - - def run(self, gen): - self._start = gen.pos - - gen.next() - - while gen.peek() in string.digits: - self._number += gen.next() - - self._end = gen.pos - - def finish(self): - return MirrorToken( - int(self._number), self._start, self._end - ) + ALLOWED_TOKENS = [ EscapeCharToken, TabStopToken, MirrorToken ] class SnippetParser(object): - MODES = { - "literal": LiteralMode, - "tabstop": TabStopMode, - "tabstop_or_mirror": TabStopOrMirrorMode, - } - def __init__(self, parent, text): debug("text: %s" % (text)) self.current_to = parent._p self.stream = TextIterator(text) self.mode = None - self.tokens = [] def parse(self): - self.switch_mode("literal") + tokens = list(LiteralMode().tokens(self.stream)) seen_ts = set() - debug("tokens: %s" % (self.tokens)) - for token in self.tokens: - if isinstance(token, LiteralTextToken): - # self.current_to._v += LiteralTextToken.text - pass - elif isinstance(token, TabStopToken): + debug("tokens: %s" % (tokens)) + for token in tokens: + if isinstance(token, TabStopToken): # TODO: could also take the token directly debug("token.start: %s, token.end: %s" % (token.start, token.end)) ts = TabStop(token.no, self.current_to, @@ -1212,7 +1183,7 @@ class SnippetParser(object): self.current_to._add_tabstop(token.no,ts) - for token in self.tokens: + for token in tokens: if isinstance(token, MirrorToken): # TODO: maybe we can get rid of _get_tabstop and _add_tabstop if token.no not in seen_ts: @@ -1227,21 +1198,3 @@ class SnippetParser(object): else: raise RuntimeError("Never here!") Mirror(self.current_to, self.current_to._get_tabstop(self.current_to, token.no), token.start, token.end) - - - - - - - def switch_mode(self, mode_name): - while not self.stream.exhausted: - self.mode = self.MODES[mode_name]() - mode_name = self.mode.run(self.stream) - self.tokens.append(self.mode.finish()) - mode_name = mode_name or "literal" - - - - - - diff --git a/test.py b/test.py index 803d3a8..3fe1ca0 100755 --- a/test.py +++ b/test.py @@ -186,7 +186,7 @@ class _VimTest(unittest.TestCase): '\n\n' + self.text_after if self.expected_error: wanted = wanted + "\n" + self.expected_error - for i in range(4): + for i in range(0): # TODO: make this a 4 again if self.output != wanted: # Redo this, but slower self.sleeptime += 0.02 @@ -2541,7 +2541,7 @@ if __name__ == '__main__': # Now, source our runtime send(":so plugin/UltiSnips.vim\n", options.session) - time.sleep(2) # Parsing and initializing UltiSnips takes a while. + time.sleep(.1) # Parsing and initializing UltiSnips takes a while. # TODO: amke this 2 seconds again # Inform all test case which screen session to use suite = unittest.TestSuite() From ef10362469a197ea3d53134ff480e20e3fb1600e Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 11:42:29 +0200 Subject: [PATCH 05/12] Fixed escaped chars handling --- plugin/UltiSnips/TextObjects.py | 46 ++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index 3d408ed..16d82ea 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -1049,7 +1049,7 @@ class TextIterator(object): raise StopIteration rv = self._text[self._idx] - if self._idx > 0 and self._text[self._idx - 1] in ('\n', '\r\n'): + if self._text[self._idx] in ('\n', '\r\n'): self._line += 1 self._col = 0 else: @@ -1118,30 +1118,55 @@ class TabStopToken(Token): if stream.peek() is ":": stream.next() self.default_text = _parse_till_closing_brace(stream) + debug("self.start: %s, stream.pos: %s" % (self.start, stream.pos)) + + def __repr__(self): + return "TabStopToken(%r,%r,%r,%r)" % ( + self.start, self.end, self.no, self.default_text + ) class MirrorToken(Token): - CHECK = re.compile(r'^\$\d+\s') + CHECK = re.compile(r'^\$\d+') @classmethod def check(klass, stream): # TODO: bad name for function + debug("string.peek(2: %r" % (stream.peek(2))) + rv = stream.peek(1) == '$' and stream.peek(2)[-1] in string.digits + debug("rv: %s" % (rv)) + return rv + # TODO return klass.CHECK.match(stream.peek(10)) != None - def __init__(self, stream): - self.no = "" - - Token.__init__(self, stream) # TODO: check for gen usage - def _parse(self, stream): + self.no = "" stream.next() # $ - while stream.peek() in string.digits: + while not stream.exhausted and stream.peek() in string.digits: self.no += stream.next() self.no = int(self.no) + def __repr__(self): + return "MirrorToken(%r,%r,%r)" % ( + self.start, self.end, self.no + ) + class EscapeCharToken(Token): @classmethod def check(klass, stream): - return stream.peek(1) == '\\' + cs = stream.peek(2) + if len(cs) == 2 and cs[0] == '\\' and cs[1] in '{}\$`': + return True + + def _parse(self, stream): + stream.next() # \ + self.char = stream.next() + + + # TODO: get rid of those repr maybe + def __repr__(self): + return "EscapeCharToken(%r,%r,%r)" % ( + self.start, self.end, self.char + ) class ParsingMode(object): def tokens(self, stream): @@ -1181,6 +1206,8 @@ class SnippetParser(object): token.start, token.end, token.default_text) seen_ts.add(token.no) self.current_to._add_tabstop(token.no,ts) + elif isinstance(token, EscapeCharToken): + EscapedChar(self.current_to, token.start, token.end, token.char) for token in tokens: @@ -1196,5 +1223,4 @@ class SnippetParser(object): self.current_to._add_tabstop(token.no,ts) debug("ALIVE3" % ()) else: - raise RuntimeError("Never here!") Mirror(self.current_to, self.current_to._get_tabstop(self.current_to, token.no), token.start, token.end) From 6c1a82c04d4a05d184fdeefd997f91c22adc3771 Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 13:10:57 +0200 Subject: [PATCH 06/12] Implemented Transformation parsing --- plugin/UltiSnips/TextObjects.py | 121 +++++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 26 deletions(-) diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index 16d82ea..e546108 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -143,7 +143,7 @@ class _TOParser(object): def parse(self): val = self._v # self._v = "" - s = SnippetParser(self, val) + s = SnippetParser(self._p, val) s.parse() # text = "" @@ -1092,21 +1092,45 @@ def _parse_number(stream): return int(rv) def _parse_till_closing_brace(stream): - # TODO: document me + # TODO: document me, this also eats the closing brace rv = "" - for c in stream: - if c == '}': break # TODO: must count braces - rv += c + in_braces = 1 + while True: + if EscapeCharToken.check(stream, '{}'): + rv += stream.next() + stream.next() + else: + c = stream.next() + if c == '{': in_braces += 1 + elif c == '}': in_braces -= 1 + if in_braces == 0: + break + rv += c + return rv + + +# TODO: the functionality of some of these functions are quite +# similar. Somekind of next_matching +def _parse_till_unescaped_slash(stream): + # TODO: document me, this also eats the closing slash + rv = "" + in_braces = 1 + while True: + if EscapeCharToken.check(stream, '/'): + rv += stream.next() + stream.next() + else: + c = stream.next() + if c == '/': + break + rv += c return rv class TabStopToken(Token): + CHECK = re.compile(r'^\${\d+[:}]') + @classmethod def check(klass, stream): # TODO: bad name for function - return stream.peek(2) == '${' - - def __init__(self, gen): - Token.__init__(self, gen) + return klass.CHECK.match(stream.peek(10)) != None def _parse(self, stream): stream.next() # $ @@ -1114,10 +1138,9 @@ class TabStopToken(Token): self.no = _parse_number(stream) - self.default_text = "" if stream.peek() is ":": stream.next() - self.default_text = _parse_till_closing_brace(stream) + self.default_text = _parse_till_closing_brace(stream) debug("self.start: %s, stream.pos: %s" % (self.start, stream.pos)) def __repr__(self): @@ -1125,17 +1148,37 @@ class TabStopToken(Token): self.start, self.end, self.no, self.default_text ) +class TransformationToken(Token): + CHECK = re.compile(r'^\${\d+\/') + + @classmethod + def check(klass, stream): + # TODO: bad name for function + return klass.CHECK.match(stream.peek(10)) != None + + def _parse(self, stream): + stream.next() # $ + stream.next() # { + + self.no = _parse_number(stream) + + stream.next() # / + + self.search = _parse_till_unescaped_slash(stream) + self.replace = _parse_till_unescaped_slash(stream) + self.options = _parse_till_closing_brace(stream) + + def __repr__(self): + return "TransformationToken(%r,%r,%r,%r,%r)" % ( + self.start, self.end, self.no, self.search, self.replace + ) + class MirrorToken(Token): CHECK = re.compile(r'^\$\d+') @classmethod def check(klass, stream): # TODO: bad name for function - debug("string.peek(2: %r" % (stream.peek(2))) - rv = stream.peek(1) == '$' and stream.peek(2)[-1] in string.digits - debug("rv: %s" % (rv)) - return rv - # TODO return klass.CHECK.match(stream.peek(10)) != None def _parse(self, stream): @@ -1152,9 +1195,9 @@ class MirrorToken(Token): class EscapeCharToken(Token): @classmethod - def check(klass, stream): + def check(klass, stream, chars = '{}\$`'): cs = stream.peek(2) - if len(cs) == 2 and cs[0] == '\\' and cs[1] in '{}\$`': + if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars: return True def _parse(self, stream): @@ -1162,7 +1205,7 @@ class EscapeCharToken(Token): self.char = stream.next() - # TODO: get rid of those repr maybe + # TODO: get rid of those __repr__ maybe def __repr__(self): return "EscapeCharToken(%r,%r,%r)" % ( self.start, self.end, self.char @@ -1181,21 +1224,26 @@ class ParsingMode(object): stream.next() class LiteralMode(ParsingMode): - ALLOWED_TOKENS = [ EscapeCharToken, TabStopToken, MirrorToken ] + ALLOWED_TOKENS = [ EscapeCharToken, TransformationToken, TabStopToken, MirrorToken ] class SnippetParser(object): def __init__(self, parent, text): debug("text: %s" % (text)) - self.current_to = parent._p + self.current_to = parent self.stream = TextIterator(text) self.mode = None - def parse(self): + def parse(self, seen_ts = None, unresolved_ts = None): tokens = list(LiteralMode().tokens(self.stream)) - seen_ts = set() + if seen_ts is None: + seen_ts = {} + if unresolved_ts is None: + unresolved_ts = set() + + unparsed_ts = [] debug("tokens: %s" % (tokens)) for token in tokens: @@ -1204,12 +1252,23 @@ class SnippetParser(object): debug("token.start: %s, token.end: %s" % (token.start, token.end)) ts = TabStop(token.no, self.current_to, token.start, token.end, token.default_text) - seen_ts.add(token.no) + seen_ts[token.no] = ts self.current_to._add_tabstop(token.no,ts) + + unparsed_ts.append(ts) elif isinstance(token, EscapeCharToken): EscapedChar(self.current_to, token.start, token.end, token.char) + elif isinstance(token, TransformationToken): + tr = Transformation(self.current_to, token.no, token.start, token.end, token.search, token.replace, token.options) + unresolved_ts.add(tr) + for ts in unparsed_ts: + debug("ts.current_text: %r" % (ts.current_text)) + k = SnippetParser(ts, ts.current_text) + k.parse(seen_ts, unresolved_ts) + # TODO: begin second phase: resolve ambiguity + # TODO: do this only once at the top level for token in tokens: if isinstance(token, MirrorToken): # TODO: maybe we can get rid of _get_tabstop and _add_tabstop @@ -1218,9 +1277,19 @@ class SnippetParser(object): ts = TabStop(token.no, self.current_to, token.start, token.end) debug("ALIVE1" % ()) - seen_ts.add(token.no) + seen_ts[token.no] = ts debug("ALIVE2" % ()) self.current_to._add_tabstop(token.no,ts) debug("ALIVE3" % ()) else: - Mirror(self.current_to, self.current_to._get_tabstop(self.current_to, token.no), token.start, token.end) + Mirror(self.current_to, seen_ts[token.no], token.start, token.end) + + # TODO: third phase: associate tabstops with Transformations + # TODO: do this only once + # TODO: this access private parts + resolved_ts = set() + for tr in unresolved_ts: + if tr._ts in seen_ts: + tr._ts = seen_ts[tr._ts] + resolved_ts.add(tr) + unresolved_ts -= resolved_ts From c5245ae69aaab50f38fa3061ff0cdb9736efda4d Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 13:19:11 +0200 Subject: [PATCH 07/12] Implemted ShellCode again --- plugin/UltiSnips/TextObjects.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index e546108..da7a853 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -1110,16 +1110,16 @@ def _parse_till_closing_brace(stream): # TODO: the functionality of some of these functions are quite # similar. Somekind of next_matching -def _parse_till_unescaped_slash(stream): +def _parse_till_unescaped_char(stream, char): # TODO: document me, this also eats the closing slash rv = "" in_braces = 1 while True: - if EscapeCharToken.check(stream, '/'): + if EscapeCharToken.check(stream, char): rv += stream.next() + stream.next() else: c = stream.next() - if c == '/': + if c == char: break rv += c return rv @@ -1164,8 +1164,8 @@ class TransformationToken(Token): stream.next() # / - self.search = _parse_till_unescaped_slash(stream) - self.replace = _parse_till_unescaped_slash(stream) + self.search = _parse_till_unescaped_char(stream, '/') + self.replace = _parse_till_unescaped_char(stream, '/') self.options = _parse_till_closing_brace(stream) def __repr__(self): @@ -1211,6 +1211,21 @@ class EscapeCharToken(Token): self.start, self.end, self.char ) +class ShellCodeToken(Token): + @classmethod + def check(klass, stream): + return stream.peek(1) == '`' + + def _parse(self, stream): + stream.next() # ` + self.content = _parse_till_unescaped_char(stream, '`') + + # TODO: get rid of those __repr__ maybe + def __repr__(self): + return "ShellCodeToken(%r,%r,%r)" % ( + self.start, self.end, self.content + ) + class ParsingMode(object): def tokens(self, stream): while True: @@ -1224,7 +1239,7 @@ class ParsingMode(object): stream.next() class LiteralMode(ParsingMode): - ALLOWED_TOKENS = [ EscapeCharToken, TransformationToken, TabStopToken, MirrorToken ] + ALLOWED_TOKENS = [ EscapeCharToken, TransformationToken, TabStopToken, MirrorToken, ShellCodeToken ] class SnippetParser(object): @@ -1261,6 +1276,8 @@ class SnippetParser(object): elif isinstance(token, TransformationToken): tr = Transformation(self.current_to, token.no, token.start, token.end, token.search, token.replace, token.options) unresolved_ts.add(tr) + elif isinstance(token, ShellCodeToken): + ShellCode(self.current_to, token.start, token.end, token.content) for ts in unparsed_ts: debug("ts.current_text: %r" % (ts.current_text)) From ac619f0bd2e1142740478c037b381642b714cf2f Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 17:01:03 +0200 Subject: [PATCH 08/12] All tests pass again. Only beautifying still to do --- plugin/UltiSnips/TextObjects.py | 164 ++++++++++++++++++++++---------- 1 file changed, 113 insertions(+), 51 deletions(-) diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index da7a853..bc4e688 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -144,7 +144,7 @@ class _TOParser(object): val = self._v # self._v = "" s = SnippetParser(self._p, val) - s.parse() + s.parse(self._indent) # text = "" @@ -1077,9 +1077,9 @@ class TextIterator(object): return self._idx >= len(self._text) class Token(object): - def __init__(self, gen): + def __init__(self, gen, indent): self.start = gen.pos - self._parse(gen) + self._parse(gen, indent) self.end = gen.pos @@ -1132,7 +1132,7 @@ class TabStopToken(Token): # TODO: bad name for function return klass.CHECK.match(stream.peek(10)) != None - def _parse(self, stream): + def _parse(self, stream, indent): stream.next() # $ stream.next() # { @@ -1156,7 +1156,7 @@ class TransformationToken(Token): # TODO: bad name for function return klass.CHECK.match(stream.peek(10)) != None - def _parse(self, stream): + def _parse(self, stream, indent): stream.next() # $ stream.next() # { @@ -1181,7 +1181,7 @@ class MirrorToken(Token): # TODO: bad name for function return klass.CHECK.match(stream.peek(10)) != None - def _parse(self, stream): + def _parse(self, stream, indent): self.no = "" stream.next() # $ while not stream.exhausted and stream.peek() in string.digits: @@ -1200,7 +1200,7 @@ class EscapeCharToken(Token): if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars: return True - def _parse(self, stream): + def _parse(self, stream, indent): stream.next() # \ self.char = stream.next() @@ -1216,7 +1216,7 @@ class ShellCodeToken(Token): def check(klass, stream): return stream.peek(1) == '`' - def _parse(self, stream): + def _parse(self, stream, indent): stream.next() # ` self.content = _parse_till_unescaped_char(stream, '`') @@ -1226,20 +1226,76 @@ class ShellCodeToken(Token): self.start, self.end, self.content ) + +# TODO: identical to VimLCodeToken +class PythonCodeToken(Token): + CHECK = re.compile(r'^`!p\s') + + @classmethod + def check(klass, stream): + return klass.CHECK.match(stream.peek(4)) is not None + + def _parse(self, stream, indent): + for i in range(3): + stream.next() # `!p + if stream.peek() in '\t ': + stream.next() + + content = _parse_till_unescaped_char(stream, '`') + + # TODO: stupid to pass the indent down even if only python + # needs it. Stupid to indent beforehand. + + debug("indent: %r" % (indent)) + # Strip the indent if any + if len(indent): + lines = content.splitlines() + self.content = lines[0] + '\n' + self.content += '\n'.join([l[len(indent):] + for l in lines[1:]]) + else: + self.content = content + self.indent = indent + + # TODO: get rid of those __repr__ maybe + def __repr__(self): + return "PythonCodeToken(%r,%r,%r)" % ( + self.start, self.end, self.content + ) + + +class VimLCodeToken(Token): + CHECK = re.compile(r'^`!v\s') + + @classmethod + def check(klass, stream): + return klass.CHECK.match(stream.peek(4)) is not None + + def _parse(self, stream, indent): + for i in range(4): + stream.next() # `!v + self.content = _parse_till_unescaped_char(stream, '`') + + # TODO: get rid of those __repr__ maybe + def __repr__(self): + return "VimLCodeToken(%r,%r,%r)" % ( + self.start, self.end, self.content + ) + class ParsingMode(object): - def tokens(self, stream): + def tokens(self, stream, indent): while True: done_something = False for t in self.ALLOWED_TOKENS: if t.check(stream): - yield t(stream) + yield t(stream, indent) done_something = True break if not done_something: stream.next() class LiteralMode(ParsingMode): - ALLOWED_TOKENS = [ EscapeCharToken, TransformationToken, TabStopToken, MirrorToken, ShellCodeToken ] + ALLOWED_TOKENS = [ EscapeCharToken, TransformationToken, TabStopToken, MirrorToken, PythonCodeToken, VimLCodeToken, ShellCodeToken ] class SnippetParser(object): @@ -1250,18 +1306,49 @@ class SnippetParser(object): self.mode = None - def parse(self, seen_ts = None, unresolved_ts = None): - tokens = list(LiteralMode().tokens(self.stream)) + def parse(self, indent): - if seen_ts is None: - seen_ts = {} - if unresolved_ts is None: - unresolved_ts = set() + seen_ts = {} + dangling_references = set() + tokens = [] - unparsed_ts = [] + self._parse(indent, tokens, seen_ts, dangling_references) + + debug("all tokens: %s" % (tokens)) + debug("seen_ts: %s" % (seen_ts)) + debug("dangling_references: %s" % (dangling_references)) + # TODO: begin second phase: resolve ambiguity + # TODO: do this only once at the top level + for parent, token in tokens: + if isinstance(token, MirrorToken): + # TODO: maybe we can get rid of _get_tabstop and _add_tabstop + if token.no not in seen_ts: + debug("token.start: %s, token.end: %s" % (token.start, token.end)) + ts = TabStop(token.no, parent, token.start, token.end) + seen_ts[token.no] = ts + parent._add_tabstop(token.no,ts) + else: + Mirror(parent, seen_ts[token.no], token.start, token.end) + + # TODO: third phase: associate tabstops with Transformations + # TODO: do this only once + # TODO: this access private parts + resolved_ts = set() + for tr in dangling_references: + if tr._ts in seen_ts: + tr._ts = seen_ts[tr._ts] + resolved_ts.add(tr) + + # TODO: check if all associations have been done properly. Also add a testcase for this! + dangling_references -= resolved_ts + + def _parse(self, indent, all_tokens, seen_ts, dangling_references): + tokens = list(LiteralMode().tokens(self.stream, indent)) debug("tokens: %s" % (tokens)) for token in tokens: + all_tokens.append((self.current_to, token)) + if isinstance(token, TabStopToken): # TODO: could also take the token directly debug("token.start: %s, token.end: %s" % (token.start, token.end)) @@ -1270,43 +1357,18 @@ class SnippetParser(object): seen_ts[token.no] = ts self.current_to._add_tabstop(token.no,ts) - unparsed_ts.append(ts) + # TODO: can't parsing be done here directly? + k = SnippetParser(ts, ts.current_text) + k._parse(indent, all_tokens, seen_ts, dangling_references) elif isinstance(token, EscapeCharToken): EscapedChar(self.current_to, token.start, token.end, token.char) elif isinstance(token, TransformationToken): tr = Transformation(self.current_to, token.no, token.start, token.end, token.search, token.replace, token.options) - unresolved_ts.add(tr) + dangling_references.add(tr) elif isinstance(token, ShellCodeToken): ShellCode(self.current_to, token.start, token.end, token.content) + elif isinstance(token, PythonCodeToken): + PythonCode(self.current_to, token.start, token.end, token.content, token.indent) + elif isinstance(token, VimLCodeToken): + VimLCode(self.current_to, token.start, token.end, token.content) - for ts in unparsed_ts: - debug("ts.current_text: %r" % (ts.current_text)) - k = SnippetParser(ts, ts.current_text) - k.parse(seen_ts, unresolved_ts) - - # TODO: begin second phase: resolve ambiguity - # TODO: do this only once at the top level - for token in tokens: - if isinstance(token, MirrorToken): - # TODO: maybe we can get rid of _get_tabstop and _add_tabstop - if token.no not in seen_ts: - debug("token.start: %s, token.end: %s" % (token.start, token.end)) - ts = TabStop(token.no, self.current_to, - token.start, token.end) - debug("ALIVE1" % ()) - seen_ts[token.no] = ts - debug("ALIVE2" % ()) - self.current_to._add_tabstop(token.no,ts) - debug("ALIVE3" % ()) - else: - Mirror(self.current_to, seen_ts[token.no], token.start, token.end) - - # TODO: third phase: associate tabstops with Transformations - # TODO: do this only once - # TODO: this access private parts - resolved_ts = set() - for tr in unresolved_ts: - if tr._ts in seen_ts: - tr._ts = seen_ts[tr._ts] - resolved_ts.add(tr) - unresolved_ts -= resolved_ts From ad0059bc2de6a941a8a3dd44568304cdc4116e0a Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 17:30:29 +0200 Subject: [PATCH 09/12] Some refactoring --- plugin/UltiSnips/Lexer.py | 284 +++++++++++++++ plugin/UltiSnips/TextObjects.py | 618 +------------------------------- 2 files changed, 302 insertions(+), 600 deletions(-) create mode 100644 plugin/UltiSnips/Lexer.py diff --git a/plugin/UltiSnips/Lexer.py b/plugin/UltiSnips/Lexer.py new file mode 100644 index 0000000..aa44a2b --- /dev/null +++ b/plugin/UltiSnips/Lexer.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python +# encoding: utf-8 + +""" +Not really a Lexer in the classical sense, but code to hack Snippet Definitions +into Logical Units called Tokens. +""" + +import string +import re + +from Geometry import Position + +# TODO: review this file + +# Helper Classes {{{ +class _TextIterator(object): + def __init__(self, text): + self._text = text + self._line = 0 + self._col = 0 + + self._idx = 0 + + def __iter__(self): + return self + + def next(self): + if self._idx >= len(self._text): + raise StopIteration + + rv = self._text[self._idx] + if self._text[self._idx] in ('\n', '\r\n'): + self._line += 1 + self._col = 0 + else: + self._col += 1 + self._idx += 1 + + return rv + + def peek(self, count = 1): + try: + return self._text[self._idx:self._idx + count] + except IndexError: + return None + + @property + def idx(self): + return self._idx # TODO: does this need to be exposed? + + @property + def pos(self): + return Position(self._line, self._col) + + @property + def exhausted(self): + return self._idx >= len(self._text) +# End: Helper Classes }}} +# Helper functions {{{ +def _parse_number(stream): + # TODO: document me + rv = "" + while stream.peek() in string.digits: + rv += stream.next() + + return int(rv) + +def _parse_till_closing_brace(stream): + # TODO: document me, this also eats the closing brace + rv = "" + in_braces = 1 + while True: + if EscapeCharToken.check(stream, '{}'): + rv += stream.next() + stream.next() + else: + c = stream.next() + if c == '{': in_braces += 1 + elif c == '}': in_braces -= 1 + if in_braces == 0: + break + rv += c + return rv + + +# TODO: the functionality of some of these functions are quite +# similar. Somekind of next_matching +def _parse_till_unescaped_char(stream, char): + # TODO: document me, this also eats the closing slash + rv = "" + in_braces = 1 + while True: + if EscapeCharToken.check(stream, char): + rv += stream.next() + stream.next() + else: + c = stream.next() + if c == char: + break + rv += c + return rv +# End: Helper functions }}} + +# Tokens {{{ +class Token(object): + def __init__(self, gen, indent): + self.start = gen.pos + self._parse(gen, indent) + self.end = gen.pos + +class TabStopToken(Token): + CHECK = re.compile(r'^\${\d+[:}]') + + @classmethod + def check(klass, stream): + # TODO: bad name for function + return klass.CHECK.match(stream.peek(10)) != None + + def _parse(self, stream, indent): + stream.next() # $ + stream.next() # { + + self.no = _parse_number(stream) + + if stream.peek() is ":": + stream.next() + self.default_text = _parse_till_closing_brace(stream) + + def __repr__(self): + return "TabStopToken(%r,%r,%r,%r)" % ( + self.start, self.end, self.no, self.default_text + ) + +class TransformationToken(Token): + CHECK = re.compile(r'^\${\d+\/') + + @classmethod + def check(klass, stream): + # TODO: bad name for function + return klass.CHECK.match(stream.peek(10)) != None + + def _parse(self, stream, indent): + stream.next() # $ + stream.next() # { + + self.no = _parse_number(stream) + + stream.next() # / + + self.search = _parse_till_unescaped_char(stream, '/') + self.replace = _parse_till_unescaped_char(stream, '/') + self.options = _parse_till_closing_brace(stream) + + def __repr__(self): + return "TransformationToken(%r,%r,%r,%r,%r)" % ( + self.start, self.end, self.no, self.search, self.replace + ) + +class MirrorToken(Token): + CHECK = re.compile(r'^\$\d+') + + @classmethod + def check(klass, stream): + # TODO: bad name for function + return klass.CHECK.match(stream.peek(10)) != None + + def _parse(self, stream, indent): + self.no = "" + stream.next() # $ + while not stream.exhausted and stream.peek() in string.digits: + self.no += stream.next() + self.no = int(self.no) + + def __repr__(self): + return "MirrorToken(%r,%r,%r)" % ( + self.start, self.end, self.no + ) + +class EscapeCharToken(Token): + @classmethod + def check(klass, stream, chars = '{}\$`'): + cs = stream.peek(2) + if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars: + return True + + def _parse(self, stream, indent): + stream.next() # \ + self.char = stream.next() + + + # TODO: get rid of those __repr__ maybe + def __repr__(self): + return "EscapeCharToken(%r,%r,%r)" % ( + self.start, self.end, self.char + ) + +class ShellCodeToken(Token): + @classmethod + def check(klass, stream): + return stream.peek(1) == '`' + + def _parse(self, stream, indent): + stream.next() # ` + self.content = _parse_till_unescaped_char(stream, '`') + + # TODO: get rid of those __repr__ maybe + def __repr__(self): + return "ShellCodeToken(%r,%r,%r)" % ( + self.start, self.end, self.content + ) + +# TODO: identical to VimLCodeToken +class PythonCodeToken(Token): + CHECK = re.compile(r'^`!p\s') + + @classmethod + def check(klass, stream): + return klass.CHECK.match(stream.peek(4)) is not None + + def _parse(self, stream, indent): + for i in range(3): + stream.next() # `!p + if stream.peek() in '\t ': + stream.next() + + content = _parse_till_unescaped_char(stream, '`') + + # TODO: stupid to pass the indent down even if only python + # needs it. Stupid to indent beforehand. + + # Strip the indent if any + if len(indent): + lines = content.splitlines() + self.content = lines[0] + '\n' + self.content += '\n'.join([l[len(indent):] + for l in lines[1:]]) + else: + self.content = content + self.indent = indent + + # TODO: get rid of those __repr__ maybe + def __repr__(self): + return "PythonCodeToken(%r,%r,%r)" % ( + self.start, self.end, self.content + ) + + +class VimLCodeToken(Token): + CHECK = re.compile(r'^`!v\s') + + @classmethod + def check(klass, stream): + return klass.CHECK.match(stream.peek(4)) is not None + + def _parse(self, stream, indent): + for i in range(4): + stream.next() # `!v + self.content = _parse_till_unescaped_char(stream, '`') + + # TODO: get rid of those __repr__ maybe + def __repr__(self): + return "VimLCodeToken(%r,%r,%r)" % ( + self.start, self.end, self.content + ) +# End: Tokens }}} + +__ALLOWED_TOKENS = [ + EscapeCharToken, TransformationToken, TabStopToken, MirrorToken, + PythonCodeToken, VimLCodeToken, ShellCodeToken +] + +def tokenize(text, indent): + stream = _TextIterator(text) + + while True: + done_something = False + for t in __ALLOWED_TOKENS: + if t.check(stream): + yield t(stream, indent) + done_something = True + break + if not done_something: + stream.next() + + diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index bc4e688..559af5d 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -112,315 +112,10 @@ class _CleverReplace(object): return self._unescape(tv.decode("string-escape")) -class _TOParser(object): - # A simple tabstop with default value - _TABSTOP = re.compile(r'''(?= len(self._text): - raise StopIteration - - rv = self._text[self._idx] - if self._text[self._idx] in ('\n', '\r\n'): - self._line += 1 - self._col = 0 - else: - self._col += 1 - self._idx += 1 - - return rv - - def peek(self, count = 1): - try: - return self._text[self._idx:self._idx + count] - except IndexError: - return None - - @property - def idx(self): - return self._idx # TODO: does this need to be exposed? - - @property - def pos(self): - return Position(self._line, self._col) - - @property - def exhausted(self): - return self._idx >= len(self._text) - -class Token(object): - def __init__(self, gen, indent): - self.start = gen.pos - self._parse(gen, indent) - self.end = gen.pos - - -def _parse_number(stream): - # TODO: document me - rv = "" - while stream.peek() in string.digits: - rv += stream.next() - - return int(rv) - -def _parse_till_closing_brace(stream): - # TODO: document me, this also eats the closing brace - rv = "" - in_braces = 1 - while True: - if EscapeCharToken.check(stream, '{}'): - rv += stream.next() + stream.next() - else: - c = stream.next() - if c == '{': in_braces += 1 - elif c == '}': in_braces -= 1 - if in_braces == 0: - break - rv += c - return rv - - -# TODO: the functionality of some of these functions are quite -# similar. Somekind of next_matching -def _parse_till_unescaped_char(stream, char): - # TODO: document me, this also eats the closing slash - rv = "" - in_braces = 1 - while True: - if EscapeCharToken.check(stream, char): - rv += stream.next() + stream.next() - else: - c = stream.next() - if c == char: - break - rv += c - return rv - -class TabStopToken(Token): - CHECK = re.compile(r'^\${\d+[:}]') - - @classmethod - def check(klass, stream): - # TODO: bad name for function - return klass.CHECK.match(stream.peek(10)) != None - - def _parse(self, stream, indent): - stream.next() # $ - stream.next() # { - - self.no = _parse_number(stream) - - if stream.peek() is ":": - stream.next() - self.default_text = _parse_till_closing_brace(stream) - debug("self.start: %s, stream.pos: %s" % (self.start, stream.pos)) - - def __repr__(self): - return "TabStopToken(%r,%r,%r,%r)" % ( - self.start, self.end, self.no, self.default_text - ) - -class TransformationToken(Token): - CHECK = re.compile(r'^\${\d+\/') - - @classmethod - def check(klass, stream): - # TODO: bad name for function - return klass.CHECK.match(stream.peek(10)) != None - - def _parse(self, stream, indent): - stream.next() # $ - stream.next() # { - - self.no = _parse_number(stream) - - stream.next() # / - - self.search = _parse_till_unescaped_char(stream, '/') - self.replace = _parse_till_unescaped_char(stream, '/') - self.options = _parse_till_closing_brace(stream) - - def __repr__(self): - return "TransformationToken(%r,%r,%r,%r,%r)" % ( - self.start, self.end, self.no, self.search, self.replace - ) - -class MirrorToken(Token): - CHECK = re.compile(r'^\$\d+') - - @classmethod - def check(klass, stream): - # TODO: bad name for function - return klass.CHECK.match(stream.peek(10)) != None - - def _parse(self, stream, indent): - self.no = "" - stream.next() # $ - while not stream.exhausted and stream.peek() in string.digits: - self.no += stream.next() - self.no = int(self.no) - - def __repr__(self): - return "MirrorToken(%r,%r,%r)" % ( - self.start, self.end, self.no - ) - -class EscapeCharToken(Token): - @classmethod - def check(klass, stream, chars = '{}\$`'): - cs = stream.peek(2) - if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars: - return True - - def _parse(self, stream, indent): - stream.next() # \ - self.char = stream.next() - - - # TODO: get rid of those __repr__ maybe - def __repr__(self): - return "EscapeCharToken(%r,%r,%r)" % ( - self.start, self.end, self.char - ) - -class ShellCodeToken(Token): - @classmethod - def check(klass, stream): - return stream.peek(1) == '`' - - def _parse(self, stream, indent): - stream.next() # ` - self.content = _parse_till_unescaped_char(stream, '`') - - # TODO: get rid of those __repr__ maybe - def __repr__(self): - return "ShellCodeToken(%r,%r,%r)" % ( - self.start, self.end, self.content - ) - - -# TODO: identical to VimLCodeToken -class PythonCodeToken(Token): - CHECK = re.compile(r'^`!p\s') - - @classmethod - def check(klass, stream): - return klass.CHECK.match(stream.peek(4)) is not None - - def _parse(self, stream, indent): - for i in range(3): - stream.next() # `!p - if stream.peek() in '\t ': - stream.next() - - content = _parse_till_unescaped_char(stream, '`') - - # TODO: stupid to pass the indent down even if only python - # needs it. Stupid to indent beforehand. - - debug("indent: %r" % (indent)) - # Strip the indent if any - if len(indent): - lines = content.splitlines() - self.content = lines[0] + '\n' - self.content += '\n'.join([l[len(indent):] - for l in lines[1:]]) - else: - self.content = content - self.indent = indent - - # TODO: get rid of those __repr__ maybe - def __repr__(self): - return "PythonCodeToken(%r,%r,%r)" % ( - self.start, self.end, self.content - ) - - -class VimLCodeToken(Token): - CHECK = re.compile(r'^`!v\s') - - @classmethod - def check(klass, stream): - return klass.CHECK.match(stream.peek(4)) is not None - - def _parse(self, stream, indent): - for i in range(4): - stream.next() # `!v - self.content = _parse_till_unescaped_char(stream, '`') - - # TODO: get rid of those __repr__ maybe - def __repr__(self): - return "VimLCodeToken(%r,%r,%r)" % ( - self.start, self.end, self.content - ) - -class ParsingMode(object): - def tokens(self, stream, indent): - while True: - done_something = False - for t in self.ALLOWED_TOKENS: - if t.check(stream): - yield t(stream, indent) - done_something = True - break - if not done_something: - stream.next() - -class LiteralMode(ParsingMode): - ALLOWED_TOKENS = [ EscapeCharToken, TransformationToken, TabStopToken, MirrorToken, PythonCodeToken, VimLCodeToken, ShellCodeToken ] - - -class SnippetParser(object): - def __init__(self, parent, text): - debug("text: %s" % (text)) +class _TOParser(object): + def __init__(self, parent, text, indent): + self._indent = indent self.current_to = parent - self.stream = TextIterator(text) - self.mode = None - - - def parse(self, indent): + self.text = text + def parse(self): seen_ts = {} - dangling_references = set() tokens = [] - self._parse(indent, tokens, seen_ts, dangling_references) + self._parse(tokens, seen_ts) debug("all tokens: %s" % (tokens)) debug("seen_ts: %s" % (seen_ts)) - debug("dangling_references: %s" % (dangling_references)) # TODO: begin second phase: resolve ambiguity # TODO: do this only once at the top level for parent, token in tokens: @@ -1331,19 +757,15 @@ class SnippetParser(object): Mirror(parent, seen_ts[token.no], token.start, token.end) # TODO: third phase: associate tabstops with Transformations - # TODO: do this only once - # TODO: this access private parts - resolved_ts = set() - for tr in dangling_references: - if tr._ts in seen_ts: - tr._ts = seen_ts[tr._ts] - resolved_ts.add(tr) - + for parent, token in tokens: + if isinstance(token, TransformationToken): + if token.no not in seen_ts: + raise RuntimeError("Tabstop %i is not known" % t._ts) + Transformation(parent, seen_ts[token.no], token.start, token.end, token.search, token.replace, token.options) # TODO: check if all associations have been done properly. Also add a testcase for this! - dangling_references -= resolved_ts - def _parse(self, indent, all_tokens, seen_ts, dangling_references): - tokens = list(LiteralMode().tokens(self.stream, indent)) + def _parse(self, all_tokens, seen_ts): + tokens = list(tokenize(self.text, self._indent)) debug("tokens: %s" % (tokens)) for token in tokens: @@ -1357,14 +779,10 @@ class SnippetParser(object): seen_ts[token.no] = ts self.current_to._add_tabstop(token.no,ts) - # TODO: can't parsing be done here directly? - k = SnippetParser(ts, ts.current_text) - k._parse(indent, all_tokens, seen_ts, dangling_references) + k = _TOParser(ts, ts.current_text, self._indent) + k._parse(all_tokens, seen_ts) elif isinstance(token, EscapeCharToken): EscapedChar(self.current_to, token.start, token.end, token.char) - elif isinstance(token, TransformationToken): - tr = Transformation(self.current_to, token.no, token.start, token.end, token.search, token.replace, token.options) - dangling_references.add(tr) elif isinstance(token, ShellCodeToken): ShellCode(self.current_to, token.start, token.end, token.content) elif isinstance(token, PythonCodeToken): From 9608346e776d69dd06b8be3a92edac147646e4e3 Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 18:29:31 +0200 Subject: [PATCH 10/12] TextObjects now accept a single token as initializer --- plugin/UltiSnips/Lexer.py | 33 ++++++------ plugin/UltiSnips/TextObjects.py | 93 +++++++++++++++++---------------- 2 files changed, 64 insertions(+), 62 deletions(-) diff --git a/plugin/UltiSnips/Lexer.py b/plugin/UltiSnips/Lexer.py index aa44a2b..081de6b 100644 --- a/plugin/UltiSnips/Lexer.py +++ b/plugin/UltiSnips/Lexer.py @@ -103,6 +103,7 @@ def _parse_till_unescaped_char(stream, char): # Tokens {{{ class Token(object): def __init__(self, gen, indent): + self.initial_text = "" self.start = gen.pos self._parse(gen, indent) self.end = gen.pos @@ -123,11 +124,11 @@ class TabStopToken(Token): if stream.peek() is ":": stream.next() - self.default_text = _parse_till_closing_brace(stream) + self.initial_text = _parse_till_closing_brace(stream) def __repr__(self): return "TabStopToken(%r,%r,%r,%r)" % ( - self.start, self.end, self.no, self.default_text + self.start, self.end, self.no, self.initial_text ) class TransformationToken(Token): @@ -164,6 +165,7 @@ class MirrorToken(Token): return klass.CHECK.match(stream.peek(10)) != None def _parse(self, stream, indent): + # TODO: why not parse number? self.no = "" stream.next() # $ while not stream.exhausted and stream.peek() in string.digits: @@ -184,13 +186,11 @@ class EscapeCharToken(Token): def _parse(self, stream, indent): stream.next() # \ - self.char = stream.next() + self.initial_text = stream.next() - - # TODO: get rid of those __repr__ maybe def __repr__(self): return "EscapeCharToken(%r,%r,%r)" % ( - self.start, self.end, self.char + self.start, self.end, self.initial_text ) class ShellCodeToken(Token): @@ -200,12 +200,12 @@ class ShellCodeToken(Token): def _parse(self, stream, indent): stream.next() # ` - self.content = _parse_till_unescaped_char(stream, '`') + self.code = _parse_till_unescaped_char(stream, '`') # TODO: get rid of those __repr__ maybe def __repr__(self): return "ShellCodeToken(%r,%r,%r)" % ( - self.start, self.end, self.content + self.start, self.end, self.code ) # TODO: identical to VimLCodeToken @@ -222,25 +222,25 @@ class PythonCodeToken(Token): if stream.peek() in '\t ': stream.next() - content = _parse_till_unescaped_char(stream, '`') + code = _parse_till_unescaped_char(stream, '`') # TODO: stupid to pass the indent down even if only python # needs it. Stupid to indent beforehand. # Strip the indent if any if len(indent): - lines = content.splitlines() - self.content = lines[0] + '\n' - self.content += '\n'.join([l[len(indent):] + lines = code.splitlines() + self.code = lines[0] + '\n' + self.code += '\n'.join([l[len(indent):] for l in lines[1:]]) else: - self.content = content + self.code = code self.indent = indent # TODO: get rid of those __repr__ maybe def __repr__(self): return "PythonCodeToken(%r,%r,%r)" % ( - self.start, self.end, self.content + self.start, self.end, self.code ) @@ -254,12 +254,11 @@ class VimLCodeToken(Token): def _parse(self, stream, indent): for i in range(4): stream.next() # `!v - self.content = _parse_till_unescaped_char(stream, '`') + self.code = _parse_till_unescaped_char(stream, '`') - # TODO: get rid of those __repr__ maybe def __repr__(self): return "VimLCodeToken(%r,%r,%r)" % ( - self.start, self.end, self.content + self.start, self.end, self.code ) # End: Tokens }}} diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index 559af5d..054b50f 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -121,26 +121,29 @@ class TextObject(object): This base class represents any object in the text that has a span in any ways """ - def __init__(self, parent, start, end, initial_text): - self._start = start - self._end = end - + def __init__(self, parent, token, end = None, initial_text = ""): self._parent = parent + if end is not None: # Took 4 arguments + self._start = token + self._end = end + self._current_text = TextBuffer(initial_text) + else: # Initialize from token + self._start = token.start + self._end = token.end + self._current_text = TextBuffer(token.initial_text) + self._childs = [] self._tabstops = {} if parent is not None: parent._add_child(self) - self._current_text = TextBuffer(initial_text) - self._cts = 0 def __cmp__(self, other): return cmp(self._start, other._start) - ############## # PROPERTIES # ############## @@ -268,7 +271,6 @@ class TextObject(object): return max(posible_sol) - ############################### # Private/Protected functions # ############################### @@ -328,7 +330,6 @@ class EscapedChar(TextObject): """ pass - class StartMarker(TextObject): """ This class only remembers it's starting position. It is used to @@ -337,15 +338,15 @@ class StartMarker(TextObject): """ def __init__(self, start): end = Position(start.line, start.col) - TextObject.__init__(self, None, start, end, "") + TextObject.__init__(self, None, start, end) class Mirror(TextObject): """ A Mirror object mirrors a TabStop that is, text is repeated here """ - def __init__(self, parent, ts, start, end): - TextObject.__init__(self, parent, start, end, "") + def __init__(self, parent, ts, token): + TextObject.__init__(self, parent, token) self._ts = ts @@ -357,19 +358,19 @@ class Mirror(TextObject): class Transformation(Mirror): - def __init__(self, parent, ts, start, end, s, r, options): - Mirror.__init__(self, parent, ts, start, end) + def __init__(self, parent, ts, token): + Mirror.__init__(self, parent, ts, token) flags = 0 self._match_this_many = 1 - if options: - if "g" in options: + if token.options: + if "g" in token.options: self._match_this_many = 0 - if "i" in options: + if "i" in token.options: flags |= re.IGNORECASE - self._find = re.compile(s, flags | re.DOTALL) - self._replace = _CleverReplace(r) + self._find = re.compile(token.search, flags | re.DOTALL) + self._replace = _CleverReplace(token.replace) def _do_update(self): t = self._ts.current_text @@ -380,9 +381,8 @@ class Transformation(Mirror): return "Transformation(%s -> %s)" % (self._start, self._end) class ShellCode(TextObject): - def __init__(self, parent, start, end, code): - - code = code.replace("\\`", "`") + def __init__(self, parent, token): + code = token.code.replace("\\`", "`") # Write the code to a temporary file handle, path = tempfile.mkstemp(text=True) @@ -401,16 +401,17 @@ class ShellCode(TextObject): os.unlink(path) - TextObject.__init__(self, parent, start, end, output) + token.initial_text = output + TextObject.__init__(self, parent, token) def __repr__(self): return "ShellCode(%s -> %s)" % (self._start, self._end) class VimLCode(TextObject): - def __init__(self, parent, start, end, code): - self._code = code.replace("\\`", "`").strip() + def __init__(self, parent, token): + self._code = token.code.replace("\\`", "`").strip() - TextObject.__init__(self, parent, start, end, "") + TextObject.__init__(self, parent, token) def _do_update(self): self.current_text = str(vim.eval(self._code)) @@ -567,9 +568,9 @@ class SnippetUtil(object): class PythonCode(TextObject): - def __init__(self, parent, start, end, code, indent=""): + def __init__(self, parent, token): - code = code.replace("\\`", "`") + code = token.code.replace("\\`", "`") # Find our containing snippet for snippet local data snippet = parent @@ -578,7 +579,7 @@ class PythonCode(TextObject): snippet = snippet._parent except AttributeError: snippet = None - self._snip = SnippetUtil(indent) + self._snip = SnippetUtil(token.indent) self._locals = snippet.locals self._globals = {} @@ -588,7 +589,7 @@ class PythonCode(TextObject): # Add Some convenience to the code self._code = "import re, os, vim, string, random\n" + code - TextObject.__init__(self, parent, start, end, "") + TextObject.__init__(self, parent, token) def _do_update(self): @@ -626,9 +627,13 @@ class TabStop(TextObject): This is the most important TextObject. A TabStop is were the cursor comes to rest when the user taps through the Snippet. """ - def __init__(self, no, parent, start, end, default_text = ""): - TextObject.__init__(self, parent, start, end, default_text) - self._no = no + def __init__(self, parent, token, start = None, end = None): + if start is not None: + self._no = token + TextObject.__init__(self, parent, start, end) + else: + TextObject.__init__(self, parent, token) + self._no = token.no def no(self): return self._no @@ -675,7 +680,7 @@ class SnippetInstance(TextObject): col -= self.start.col start = Position(delta.line, col) end = Position(delta.line, col) - ts = TabStop(0, self, start, end, "") + ts = TabStop(self, 0, start, end) self._add_tabstop(0,ts) self.update() @@ -734,6 +739,7 @@ class _TOParser(object): self._indent = indent self.current_to = parent self.text = text + debug("text: %s" % (text)) def parse(self): seen_ts = {} @@ -750,18 +756,18 @@ class _TOParser(object): # TODO: maybe we can get rid of _get_tabstop and _add_tabstop if token.no not in seen_ts: debug("token.start: %s, token.end: %s" % (token.start, token.end)) - ts = TabStop(token.no, parent, token.start, token.end) + ts = TabStop(parent, token) seen_ts[token.no] = ts parent._add_tabstop(token.no,ts) else: - Mirror(parent, seen_ts[token.no], token.start, token.end) + Mirror(parent, seen_ts[token.no], token) # TODO: third phase: associate tabstops with Transformations for parent, token in tokens: if isinstance(token, TransformationToken): if token.no not in seen_ts: raise RuntimeError("Tabstop %i is not known" % t._ts) - Transformation(parent, seen_ts[token.no], token.start, token.end, token.search, token.replace, token.options) + Transformation(parent, seen_ts[token.no], token) # TODO: check if all associations have been done properly. Also add a testcase for this! def _parse(self, all_tokens, seen_ts): @@ -772,21 +778,18 @@ class _TOParser(object): all_tokens.append((self.current_to, token)) if isinstance(token, TabStopToken): - # TODO: could also take the token directly - debug("token.start: %s, token.end: %s" % (token.start, token.end)) - ts = TabStop(token.no, self.current_to, - token.start, token.end, token.default_text) + ts = TabStop(self.current_to, token) seen_ts[token.no] = ts self.current_to._add_tabstop(token.no,ts) k = _TOParser(ts, ts.current_text, self._indent) k._parse(all_tokens, seen_ts) elif isinstance(token, EscapeCharToken): - EscapedChar(self.current_to, token.start, token.end, token.char) + EscapedChar(self.current_to, token) elif isinstance(token, ShellCodeToken): - ShellCode(self.current_to, token.start, token.end, token.content) + ShellCode(self.current_to, token) elif isinstance(token, PythonCodeToken): - PythonCode(self.current_to, token.start, token.end, token.content, token.indent) + PythonCode(self.current_to, token) elif isinstance(token, VimLCodeToken): - VimLCode(self.current_to, token.start, token.end, token.content) + VimLCode(self.current_to, token) From 7f12de5cd991d9de43541d2fd3d9dfd750cb5e78 Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 18:46:35 +0200 Subject: [PATCH 11/12] More cleanup --- plugin/UltiSnips/Lexer.py | 39 +++++----- plugin/UltiSnips/TextObjects.py | 131 +++++++++++++++----------------- 2 files changed, 84 insertions(+), 86 deletions(-) diff --git a/plugin/UltiSnips/Lexer.py b/plugin/UltiSnips/Lexer.py index 081de6b..4595fdd 100644 --- a/plugin/UltiSnips/Lexer.py +++ b/plugin/UltiSnips/Lexer.py @@ -11,7 +11,10 @@ import re from Geometry import Position -# TODO: review this file +__all__ = [ + "tokenize", "EscapeCharToken", "TransformationToken", "TabStopToken", + "MirrorToken", "PythonCodeToken", "VimLCodeToken", "ShellCodeToken" +] # Helper Classes {{{ class _TextIterator(object): @@ -45,21 +48,20 @@ class _TextIterator(object): except IndexError: return None - @property - def idx(self): - return self._idx # TODO: does this need to be exposed? - @property def pos(self): return Position(self._line, self._col) @property - def exhausted(self): + def exhausted(self): # Only used in one place. Really neede? TODO return self._idx >= len(self._text) # End: Helper Classes }}} # Helper functions {{{ def _parse_number(stream): - # TODO: document me + """ + Expects the stream to contain a number next, returns the number + without consuming any more bytes + """ rv = "" while stream.peek() in string.digits: rv += stream.next() @@ -67,7 +69,12 @@ def _parse_number(stream): return int(rv) def _parse_till_closing_brace(stream): - # TODO: document me, this also eats the closing brace + """ + Returns all chars till a non-escaped } is found. Other + non escaped { are taken into account and skipped over. + + Will also consume the closing }, but not return it + """ rv = "" in_braces = 1 while True: @@ -77,25 +84,23 @@ def _parse_till_closing_brace(stream): c = stream.next() if c == '{': in_braces += 1 elif c == '}': in_braces -= 1 - if in_braces == 0: - break + if in_braces == 0: break rv += c return rv - -# TODO: the functionality of some of these functions are quite -# similar. Somekind of next_matching def _parse_till_unescaped_char(stream, char): - # TODO: document me, this also eats the closing slash + """ + Returns all chars till a non-escaped `char` is found. + + Will also consume the closing `char`, but not return it + """ rv = "" - in_braces = 1 while True: if EscapeCharToken.check(stream, char): rv += stream.next() + stream.next() else: c = stream.next() - if c == char: - break + if c == char: break rv += c return rv # End: Helper functions }}} diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index 054b50f..d159ea6 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -10,13 +10,11 @@ import vim from UltiSnips.Util import IndentUtil from UltiSnips.Buffer import TextBuffer from UltiSnips.Geometry import Span, Position +from UltiSnips.Lexer import tokenize, EscapeCharToken, TransformationToken, \ + TabStopToken, MirrorToken, PythonCodeToken, VimLCodeToken, ShellCodeToken __all__ = [ "Mirror", "Transformation", "SnippetInstance", "StartMarker" ] -from itertools import takewhile - -from debug import debug - ########################################################################### # Helper class # ########################################################################### @@ -113,6 +111,66 @@ class _CleverReplace(object): return self._unescape(tv.decode("string-escape")) +class _TOParser(object): + def __init__(self, parent_to, text, indent): + self._indent = indent + self._parent_to = parent_to + self._text = text + + def parse(self): + seen_ts = {} + all_tokens = [] + + self._do_parse(all_tokens, seen_ts) + + self._resolve_ambiguity(all_tokens, seen_ts) + self._create_objects_with_links_to_tabs(all_tokens, seen_ts) + + ##################### + # Private Functions # + ##################### + def _resolve_ambiguity(self, all_tokens, seen_ts): + for parent, token in all_tokens: + if isinstance(token, MirrorToken): + if token.no not in seen_ts: + ts = TabStop(parent, token) + seen_ts[token.no] = ts + parent._add_tabstop(token.no,ts) + else: + Mirror(parent, seen_ts[token.no], token) + + def _create_objects_with_links_to_tabs(self, all_tokens, seen_ts): + for parent, token in all_tokens: + if isinstance(token, TransformationToken): + if token.no not in seen_ts: + raise RuntimeError("Tabstop %i is not known but is used by a Transformation" % t._ts) + Transformation(parent, seen_ts[token.no], token) + # TODO: check if all associations have been done properly. Also add a testcase for this! + + def _do_parse(self, all_tokens, seen_ts): + tokens = list(tokenize(self._text, self._indent)) + + for token in tokens: + all_tokens.append((self._parent_to, token)) + + if isinstance(token, TabStopToken): + ts = TabStop(self._parent_to, token) + seen_ts[token.no] = ts + self._parent_to._add_tabstop(token.no,ts) + + k = _TOParser(ts, ts.current_text, self._indent) + k._do_parse(all_tokens, seen_ts) + elif isinstance(token, EscapeCharToken): + EscapedChar(self._parent_to, token) + elif isinstance(token, ShellCodeToken): + ShellCode(self._parent_to, token) + elif isinstance(token, PythonCodeToken): + PythonCode(self._parent_to, token) + elif isinstance(token, VimLCodeToken): + VimLCode(self._parent_to, token) + + + ########################################################################### # Public classes # ########################################################################### @@ -728,68 +786,3 @@ class SnippetInstance(TextObject): return ts return self._tabstops[self._cts] - -## TODO: everything below here should be it's own module -from debug import debug # TODO: remove -from Lexer import tokenize, EscapeCharToken, TransformationToken, \ - TabStopToken, MirrorToken, PythonCodeToken, VimLCodeToken, ShellCodeToken - -class _TOParser(object): - def __init__(self, parent, text, indent): - self._indent = indent - self.current_to = parent - self.text = text - debug("text: %s" % (text)) - - def parse(self): - seen_ts = {} - tokens = [] - - self._parse(tokens, seen_ts) - - debug("all tokens: %s" % (tokens)) - debug("seen_ts: %s" % (seen_ts)) - # TODO: begin second phase: resolve ambiguity - # TODO: do this only once at the top level - for parent, token in tokens: - if isinstance(token, MirrorToken): - # TODO: maybe we can get rid of _get_tabstop and _add_tabstop - if token.no not in seen_ts: - debug("token.start: %s, token.end: %s" % (token.start, token.end)) - ts = TabStop(parent, token) - seen_ts[token.no] = ts - parent._add_tabstop(token.no,ts) - else: - Mirror(parent, seen_ts[token.no], token) - - # TODO: third phase: associate tabstops with Transformations - for parent, token in tokens: - if isinstance(token, TransformationToken): - if token.no not in seen_ts: - raise RuntimeError("Tabstop %i is not known" % t._ts) - Transformation(parent, seen_ts[token.no], token) - # TODO: check if all associations have been done properly. Also add a testcase for this! - - def _parse(self, all_tokens, seen_ts): - tokens = list(tokenize(self.text, self._indent)) - - debug("tokens: %s" % (tokens)) - for token in tokens: - all_tokens.append((self.current_to, token)) - - if isinstance(token, TabStopToken): - ts = TabStop(self.current_to, token) - seen_ts[token.no] = ts - self.current_to._add_tabstop(token.no,ts) - - k = _TOParser(ts, ts.current_text, self._indent) - k._parse(all_tokens, seen_ts) - elif isinstance(token, EscapeCharToken): - EscapedChar(self.current_to, token) - elif isinstance(token, ShellCodeToken): - ShellCode(self.current_to, token) - elif isinstance(token, PythonCodeToken): - PythonCode(self.current_to, token) - elif isinstance(token, VimLCodeToken): - VimLCode(self.current_to, token) - From f7c12f9d5531cf651a5e3fb7ba1041aefbb2e10c Mon Sep 17 00:00:00 2001 From: Holger Rapp Date: Sun, 24 Jul 2011 19:16:49 +0200 Subject: [PATCH 12/12] Fixed all remaining TODOs --- plugin/UltiSnips/Lexer.py | 43 +++++++++------------------------ plugin/UltiSnips/TextObjects.py | 2 -- 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/plugin/UltiSnips/Lexer.py b/plugin/UltiSnips/Lexer.py index 4595fdd..ccd2755 100644 --- a/plugin/UltiSnips/Lexer.py +++ b/plugin/UltiSnips/Lexer.py @@ -51,10 +51,6 @@ class _TextIterator(object): @property def pos(self): return Position(self._line, self._col) - - @property - def exhausted(self): # Only used in one place. Really neede? TODO - return self._idx >= len(self._text) # End: Helper Classes }}} # Helper functions {{{ def _parse_number(stream): @@ -63,7 +59,7 @@ def _parse_number(stream): without consuming any more bytes """ rv = "" - while stream.peek() in string.digits: + while stream.peek() and stream.peek() in string.digits: rv += stream.next() return int(rv) @@ -78,7 +74,7 @@ def _parse_till_closing_brace(stream): rv = "" in_braces = 1 while True: - if EscapeCharToken.check(stream, '{}'): + if EscapeCharToken.starts_here(stream, '{}'): rv += stream.next() + stream.next() else: c = stream.next() @@ -96,7 +92,7 @@ def _parse_till_unescaped_char(stream, char): """ rv = "" while True: - if EscapeCharToken.check(stream, char): + if EscapeCharToken.starts_here(stream, char): rv += stream.next() + stream.next() else: c = stream.next() @@ -117,8 +113,7 @@ class TabStopToken(Token): CHECK = re.compile(r'^\${\d+[:}]') @classmethod - def check(klass, stream): - # TODO: bad name for function + def starts_here(klass, stream): return klass.CHECK.match(stream.peek(10)) != None def _parse(self, stream, indent): @@ -140,8 +135,7 @@ class TransformationToken(Token): CHECK = re.compile(r'^\${\d+\/') @classmethod - def check(klass, stream): - # TODO: bad name for function + def starts_here(klass, stream): return klass.CHECK.match(stream.peek(10)) != None def _parse(self, stream, indent): @@ -165,17 +159,12 @@ class MirrorToken(Token): CHECK = re.compile(r'^\$\d+') @classmethod - def check(klass, stream): - # TODO: bad name for function + def starts_here(klass, stream): return klass.CHECK.match(stream.peek(10)) != None def _parse(self, stream, indent): - # TODO: why not parse number? - self.no = "" stream.next() # $ - while not stream.exhausted and stream.peek() in string.digits: - self.no += stream.next() - self.no = int(self.no) + self.no = _parse_number(stream) def __repr__(self): return "MirrorToken(%r,%r,%r)" % ( @@ -184,7 +173,7 @@ class MirrorToken(Token): class EscapeCharToken(Token): @classmethod - def check(klass, stream, chars = '{}\$`'): + def starts_here(klass, stream, chars = '{}\$`'): cs = stream.peek(2) if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars: return True @@ -200,25 +189,23 @@ class EscapeCharToken(Token): class ShellCodeToken(Token): @classmethod - def check(klass, stream): + def starts_here(klass, stream): return stream.peek(1) == '`' def _parse(self, stream, indent): stream.next() # ` self.code = _parse_till_unescaped_char(stream, '`') - # TODO: get rid of those __repr__ maybe def __repr__(self): return "ShellCodeToken(%r,%r,%r)" % ( self.start, self.end, self.code ) -# TODO: identical to VimLCodeToken class PythonCodeToken(Token): CHECK = re.compile(r'^`!p\s') @classmethod - def check(klass, stream): + def starts_here(klass, stream): return klass.CHECK.match(stream.peek(4)) is not None def _parse(self, stream, indent): @@ -229,9 +216,6 @@ class PythonCodeToken(Token): code = _parse_till_unescaped_char(stream, '`') - # TODO: stupid to pass the indent down even if only python - # needs it. Stupid to indent beforehand. - # Strip the indent if any if len(indent): lines = code.splitlines() @@ -242,18 +226,16 @@ class PythonCodeToken(Token): self.code = code self.indent = indent - # TODO: get rid of those __repr__ maybe def __repr__(self): return "PythonCodeToken(%r,%r,%r)" % ( self.start, self.end, self.code ) - class VimLCodeToken(Token): CHECK = re.compile(r'^`!v\s') @classmethod - def check(klass, stream): + def starts_here(klass, stream): return klass.CHECK.match(stream.peek(4)) is not None def _parse(self, stream, indent): @@ -271,14 +253,13 @@ __ALLOWED_TOKENS = [ EscapeCharToken, TransformationToken, TabStopToken, MirrorToken, PythonCodeToken, VimLCodeToken, ShellCodeToken ] - def tokenize(text, indent): stream = _TextIterator(text) while True: done_something = False for t in __ALLOWED_TOKENS: - if t.check(stream): + if t.starts_here(stream): yield t(stream, indent) done_something = True break diff --git a/plugin/UltiSnips/TextObjects.py b/plugin/UltiSnips/TextObjects.py index d159ea6..05a7558 100644 --- a/plugin/UltiSnips/TextObjects.py +++ b/plugin/UltiSnips/TextObjects.py @@ -110,7 +110,6 @@ class _CleverReplace(object): return self._unescape(tv.decode("string-escape")) - class _TOParser(object): def __init__(self, parent_to, text, indent): self._indent = indent @@ -145,7 +144,6 @@ class _TOParser(object): if token.no not in seen_ts: raise RuntimeError("Tabstop %i is not known but is used by a Transformation" % t._ts) Transformation(parent, seen_ts[token.no], token) - # TODO: check if all associations have been done properly. Also add a testcase for this! def _do_parse(self, all_tokens, seen_ts): tokens = list(tokenize(self._text, self._indent))