Here is a small example of making a string tokenizer using regular expressions. Can be handy if you plan on creating your own language in Lua. Playing around a bit with it, will try to make a something that can create a syntax tree from the tokens later on.
--# Main
function setup()
local tokens = tokenize('foo.fie.fum(1+2 >= 100.5, "hello world")')
for i,v in ipairs(tokens) do print(v.type, v.value) end
end
--# Tokenize
local source, tokens, cursor
local patterns = {}
local function on(pattern, createFn)
table.insert(patterns, function ()
local _, len, res, group = string.find(source, "^(" .. pattern .. ")")
if len then
if createFn then
local token = createFn(group or res)
token.from, token.to = cursor, cursor+len
table.insert(tokens, token)
end
source = string.sub(source, len+1)
cursor = cursor + len
return true
end
end)
end
on("%s+")
on("[A-Za-z_]%a*", function (w) return {type="name", value=w} end)
on("%d+%.%d+", function (d) return {type="number", value=tonumber(d)} end)
on("%d+", function (d) return {type="number", value=tonumber(d)} end)
on('"([^"]*)"', function (s) return {type="string", value=s} end)
on("[=<>!+%.%-*&|/%^][=<>&|]?", function (op) return {type="operator", value=op} end)
on("[{}%(%),]", function (op) return {type="operator", value=op} end)
local function dispatch()
for i,m in ipairs(patterns) do
if m() then return true end
end
end
function tokenize(src)
source, tokens, cursor = src, {}, 0
while #source>0 and dispatch() do end
if #source > 0 then print("tokenizer failed at " .. source) end
return tokens
end
(A little curious about build something based on this page http://javascript.crockford.com/tdop/tdop.html)