[go: up one dir, main page]

Menu

[4d509f]: / roundup / token.py  Maximize  Restore  History

Download this file

123 lines (119 with data), 4.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#
# Copyright (c) 2001 Richard Jones, richard@bofh.asn.au.
# This module is free software, and you may redistribute it and/or modify
# under the same terms as Python, so long as this copyright message and
# disclaimer are retained in their original form.
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# $Id: token.py,v 1.2 2002-01-02 02:31:38 richard Exp $
#
__doc__ = """
This module provides the tokeniser used by roundup-admin.
"""
def token_split(s, whitespace=' \r\n\t', quotes='\'"',
escaped={'r':'\r', 'n':'\n', 't':'\t'}):
'''Split the string up into tokens. An occurence of a ' or " in the
input will cause the splitter to ignore whitespace until a matching
quote char is found. Embedded non-matching quote chars are also
skipped.
Whitespace and quoting characters may be escaped using a backslash.
\r, \n and \t are converted to carriage-return, newline and tab.
All other backslashed characters are left as-is.
Valid:
hello world (2 tokens: hello, world)
"hello world" (1 token: hello world)
"Roch'e" Compaan (2 tokens: Roch'e Compaan)
Roch\'e Compaan (2 tokens: Roch'e Compaan)
address="1 2 3" (1 token: address=1 2 3)
\\ (1 token: \)
\n (1 token: a newline)
\o (1 token: \o)
Invalid:
"hello world (no matching quote)
Roch'e Compaan (no matching quote)
'''
l = []
pos = 0
NEWTOKEN = 'newtoken'
TOKEN = 'token'
QUOTE = 'quote'
ESCAPE = 'escape'
quotechar = ''
state = NEWTOKEN
oldstate = '' # one-level state stack ;)
length = len(s)
finish = 0
token = ''
while 1:
# end of string, finish off the current token
if pos == length:
if state == QUOTE: raise ValueError, "unmatched quote"
elif state == TOKEN: l.append(token)
break
c = s[pos]
if state == NEWTOKEN:
# looking for a new token
if c in quotes:
# quoted token
state = QUOTE
quotechar = c
pos = pos + 1
continue
elif c in whitespace:
# skip whitespace
pos = pos + 1
continue
elif c == '\\':
pos = pos + 1
oldstate = TOKEN
state = ESCAPE
continue
# otherwise we have a token
state = TOKEN
elif state == TOKEN:
if c in whitespace:
# have a token, and have just found a whitespace terminator
l.append(token)
pos = pos + 1
state = NEWTOKEN
token = ''
continue
elif c in quotes:
# have a token, just found embedded quotes
state = QUOTE
quotechar = c
pos = pos + 1
continue
elif c == '\\':
pos = pos + 1
oldstate = state
state = ESCAPE
continue
elif state == QUOTE and c == quotechar:
# in a quoted token and found a matching quote char
pos = pos + 1
# now we're looking for whitespace
state = TOKEN
continue
elif state == ESCAPE:
# escaped-char conversions (t, r, n)
# TODO: octal, hexdigit
state = oldstate
if escaped.has_key(c):
c = escaped[c]
# just add this char to the token and move along
token = token + c
pos = pos + 1
return l
#
# $Log: not supported by cvs2svn $
# Revision 1.1 2001/12/31 05:09:20 richard
# Added better tokenising to roundup-admin - handles spaces and stuff. Can
# use quoting or backslashes. See the roundup.token pydoc.
#
#
#
# vim: set filetype=python ts=4 sw=4 et si