老早就想搞一个完整的解释器,不过呢,通用语言的解释器比较庞大,不太好写。写一个 JSON 解释器比较合适,要素不多,好掌握一点,而且在忽略一些细节之后,代码量极少,对于入门者了解解释器甚至编译器到底在干什么很有帮助。
根据 https://www.json.org 的介绍,JSON 由以下结构组成:
- object: 无序的 name/value 键值对,以
{
开始,以}
结束,name 后跟着:
,name/value 键值对由,
分隔; - array: 有序的值集合,以
[
开始,以]
结束,值由,
分隔; - value: 可以是位于两个
"
之间的 string,或者是 true 、false、null、object、array,该结构可以嵌套; - string: 由双引号包裹的 0 个或多个 Unicode 字符序列,使用反斜杠
\
转义。一个字符代表一个单字节字符串。字符串跟 C 或者 Java 的字符串类似; - number: 跟 C 或 Java 的数字类似,但是不使用八进制和十六进制的格式;
- whitespace: 可以出现在任何 token 对之间;
以下是一个完整的 JSON 解释器代码:
# json_parser.py
def parse_json(json_str):
index = 0
length = len(json_str)
def parse_value():
char = json_str[index]
if char == "{":
return parse_object()
elif char == "[":
return parse_array()
elif char == "\"":
return parse_string()
elif char == "t":
return parse_true()
elif char == "f":
return parse_false()
elif char == "n":
return parse_null()
else:
return parse_number()
def parse_object():
nonlocal index
obj = {}
index += 1
while index < length and json_str[index] != "}":
key = parse_string()
if index >= length and json_str[index] != "}":
raise "Expected ':'"
index += 1
val = parse_value()
obj[key] = val
if index < length and json_str[index] == ",":
index += 1
if index >= length:
raise "Unexpected end of input"
index += 1
return obj
def parse_array():
nonlocal index
index += 1
arr = []
while index < length and json_str[index] != "]":
val = parse_value()
arr.append(val)
if index < length and json_str[index] == ",":
index += 1
if index >= length:
raise "Unexpected end of input"
index += 1
return arr
def parse_string():
nonlocal index
index += 1
start = index
while index < length and json_str[index] != '"':
if json_str[index] == "\\":
index += 1
index += 1
if index >= length:
raise "Unexpected end of input"
index += 1
return json_str[start:index - 1]
def parse_true():
nonlocal index
if json_str[index:index + 4] == "true":
index += 4
return True
raise "Unexpected token: " + json_str[index:index + 4]
def parse_false():
nonlocal index
if json_str[index:index + 5] == "false":
index += 5
return False
raise "Unexpected token: " + json_str[index:index + 5]
def parse_null():
nonlocal index
if json_str[index:index + 4] == "null":
index += 4
return None
raise "Unexpected token: " + json_str[index:index + 5]
def parse_number():
nonlocal index
start = index
while index < length and json_str[index].isdigit():
index += 1
if index < length and json_str[index] == ".":
index += 1
while index < length and json_str[index].isdigit():
index += 1
if index < length and (json_str[index] == "e" or json_str[index] == "E"):
index += 1
if index < length and (json_str[index] == "+" or json_str[index] == "-"):
index += 1
while index < length and json_str[index].isdigit():
index += 1
return float(json_str[start:index])
return parse_value()
s = '{"name":"hello","int":1,"string":"True","arr":["a","你好",1.1,true,null,{"value":"haha"}],"bool":true}'
res = parse_json(s)
print(res)
print(res['arr'][1])
print(res['arr'][5]['value'])
运行结果如下:
$ python json_parser.py
{'name': 'hello', 'int': 1.0, 'string': 'True', 'arr': ['a', '你好', 1.1, True, None, {'value': 'haha'}], 'bool': True}
你好
haha