My favorites | Sign in
Project Home
Checkout   Browse   Changes  
Changes to /trunk/src/mochijson2.erl
r131 vs. r132 Compare: vs.  Format:
Revision r132
Go to: 
Project members, sign in to write a code review
/trunk/src/mochijson2.erl   r131 /trunk/src/mochijson2.erl   r132
1 %% @author Bob Ippolito <bob@mochimedia.com> 1 %% @author Bob Ippolito <bob@mochimedia.com>
2 %% @copyright 2007 Mochi Media, Inc. 2 %% @copyright 2007 Mochi Media, Inc.
3 3
4 %% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works 4 %% @doc Yet another JSON (RFC 4627) library for Erlang. mochijson2 works
5 %% with binaries as strings, arrays as lists (without an {array, _}) 5 %% with binaries as strings, arrays as lists (without an {array, _})
6 %% wrapper and it only knows how to decode UTF-8 (and ASCII). 6 %% wrapper and it only knows how to decode UTF-8 (and ASCII).
7 7
8 -module(mochijson2). 8 -module(mochijson2).
9 -author('bob@mochimedia.com'). 9 -author('bob@mochimedia.com').
10 -export([encoder/1, encode/1]). 10 -export([encoder/1, encode/1]).
11 -export([decoder/1, decode/1]). 11 -export([decoder/1, decode/1]).
12 12
13 % This is a macro to placate syntax highlighters.. 13 % This is a macro to placate syntax highlighters..
14 -define(Q, $\"). 14 -define(Q, $\").
15 -define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset, 15 -define(ADV_COL(S, N), S#decoder{offset=N+S#decoder.offset,
16 column=N+S#decoder.column}). 16 column=N+S#decoder.column}).
17 -define(INC_COL(S), S#decoder{offset=1+S#decoder.offset, 17 -define(INC_COL(S), S#decoder{offset=1+S#decoder.offset,
18 column=1+S#decoder.column}). 18 column=1+S#decoder.column}).
19 -define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset, 19 -define(INC_LINE(S), S#decoder{offset=1+S#decoder.offset,
20 column=1, 20 column=1,
21 line=1+S#decoder.line}). 21 line=1+S#decoder.line}).
22 -define(INC_CHAR(S, C), 22 -define(INC_CHAR(S, C),
23 case C of 23 case C of
24 $\n -> 24 $\n ->
25 S#decoder{column=1, 25 S#decoder{column=1,
26 line=1+S#decoder.line, 26 line=1+S#decoder.line,
27 offset=1+S#decoder.offset}; 27 offset=1+S#decoder.offset};
28 _ -> 28 _ ->
29 S#decoder{column=1+S#decoder.column, 29 S#decoder{column=1+S#decoder.column,
30 offset=1+S#decoder.offset} 30 offset=1+S#decoder.offset}
31 end). 31 end).
32 -define(IS_WHITESPACE(C), 32 -define(IS_WHITESPACE(C),
33 (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)). 33 (C =:= $\s orelse C =:= $\t orelse C =:= $\r orelse C =:= $\n)).
34 34
35 %% @type iolist() = [char() | binary() | iolist()] 35 %% @type iolist() = [char() | binary() | iolist()]
36 %% @type iodata() = iolist() | binary() 36 %% @type iodata() = iolist() | binary()
37 %% @type json_string() = atom | binary() 37 %% @type json_string() = atom | binary()
38 %% @type json_number() = integer() | float() 38 %% @type json_number() = integer() | float()
39 %% @type json_array() = [json_term()] 39 %% @type json_array() = [json_term()]
40 %% @type json_object() = {struct, [{json_string(), json_term()}]} 40 %% @type json_object() = {struct, [{json_string(), json_term()}]}
41 %% @type json_iolist() = {json, iolist()} 41 %% @type json_iolist() = {json, iolist()}
42 %% @type json_term() = json_string() | json_number() | json_array() | 42 %% @type json_term() = json_string() | json_number() | json_array() |
43 %% json_object() | json_iolist() 43 %% json_object() | json_iolist()
44 44
45 -record(encoder, {handler=null, 45 -record(encoder, {handler=null,
46 utf8=false}). 46 utf8=false}).
47 47
48 -record(decoder, {object_hook=null, 48 -record(decoder, {object_hook=null,
49 offset=0, 49 offset=0,
50 line=1, 50 line=1,
51 column=1, 51 column=1,
52 state=null}). 52 state=null}).
53 53
54 %% @spec encoder([encoder_option()]) -> function() 54 %% @spec encoder([encoder_option()]) -> function()
55 %% @doc Create an encoder/1 with the given options. 55 %% @doc Create an encoder/1 with the given options.
56 %% @type encoder_option() = handler_option() | utf8_option() 56 %% @type encoder_option() = handler_option() | utf8_option()
57 %% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false) 57 %% @type utf8_option() = boolean(). Emit unicode as utf8 (default - false)
58 encoder(Options) -> 58 encoder(Options) ->
59 State = parse_encoder_options(Options, #encoder{}), 59 State = parse_encoder_options(Options, #encoder{}),
60 fun (O) -> json_encode(O, State) end. 60 fun (O) -> json_encode(O, State) end.
61 61
62 %% @spec encode(json_term()) -> iolist() 62 %% @spec encode(json_term()) -> iolist()
63 %% @doc Encode the given as JSON to an iolist. 63 %% @doc Encode the given as JSON to an iolist.
64 encode(Any) -> 64 encode(Any) ->
65 json_encode(Any, #encoder{}). 65 json_encode(Any, #encoder{}).
66 66
67 %% @spec decoder([decoder_option()]) -> function() 67 %% @spec decoder([decoder_option()]) -> function()
68 %% @doc Create a decoder/1 with the given options. 68 %% @doc Create a decoder/1 with the given options.
69 decoder(Options) -> 69 decoder(Options) ->
70 State = parse_decoder_options(Options, #decoder{}), 70 State = parse_decoder_options(Options, #decoder{}),
71 fun (O) -> json_decode(O, State) end. 71 fun (O) -> json_decode(O, State) end.
72 72
73 %% @spec decode(iolist()) -> json_term() 73 %% @spec decode(iolist()) -> json_term()
74 %% @doc Decode the given iolist to Erlang terms. 74 %% @doc Decode the given iolist to Erlang terms.
75 decode(S) -> 75 decode(S) ->
76 json_decode(S, #decoder{}). 76 json_decode(S, #decoder{}).
77 77
78 %% Internal API 78 %% Internal API
79 79
80 parse_encoder_options([], State) -> 80 parse_encoder_options([], State) ->
81 State; 81 State;
82 parse_encoder_options([{handler, Handler} | Rest], State) -> 82 parse_encoder_options([{handler, Handler} | Rest], State) ->
83 parse_encoder_options(Rest, State#encoder{handler=Handler}); 83 parse_encoder_options(Rest, State#encoder{handler=Handler});
84 parse_encoder_options([{utf8, Switch} | Rest], State) -> 84 parse_encoder_options([{utf8, Switch} | Rest], State) ->
85 parse_encoder_options(Rest, State#encoder{utf8=Switch}). 85 parse_encoder_options(Rest, State#encoder{utf8=Switch}).
86 86
87 parse_decoder_options([], State) -> 87 parse_decoder_options([], State) ->
88 State; 88 State;
89 parse_decoder_options([{object_hook, Hook} | Rest], State) -> 89 parse_decoder_options([{object_hook, Hook} | Rest], State) ->
90 parse_decoder_options(Rest, State#decoder{object_hook=Hook}). 90 parse_decoder_options(Rest, State#decoder{object_hook=Hook}).
91 91
92 json_encode(true, _State) -> 92 json_encode(true, _State) ->
93 <<"true">>; 93 <<"true">>;
94 json_encode(false, _State) -> 94 json_encode(false, _State) ->
95 <<"false">>; 95 <<"false">>;
96 json_encode(null, _State) -> 96 json_encode(null, _State) ->
97 <<"null">>; 97 <<"null">>;
98 json_encode(I, _State) when is_integer(I) andalso I >= -2147483648 andalso I =< 2147483647 -> 98 json_encode(I, _State) when is_integer(I) andalso I >= -2147483648 andalso I =< 2147483647 ->
99 %% Anything outside of 32-bit integers should be encoded as a float 99 %% Anything outside of 32-bit integers should be encoded as a float
100 integer_to_list(I); 100 integer_to_list(I);
101 json_encode(I, _State) when is_integer(I) -> 101 json_encode(I, _State) when is_integer(I) ->
102 mochinum:digits(float(I)); 102 mochinum:digits(float(I));
103 json_encode(F, _State) when is_float(F) -> 103 json_encode(F, _State) when is_float(F) ->
104 mochinum:digits(F); 104 mochinum:digits(F);
105 json_encode(S, State) when is_binary(S); is_atom(S) -> 105 json_encode(S, State) when is_binary(S); is_atom(S) ->
106 json_encode_string(S, State); 106 json_encode_string(S, State);
107 json_encode(Array, State) when is_list(Array) -> 107 json_encode(Array, State) when is_list(Array) ->
108 json_encode_array(Array, State); 108 json_encode_array(Array, State);
109 json_encode({struct, Props}, State) when is_list(Props) -> 109 json_encode({struct, Props}, State) when is_list(Props) ->
110 json_encode_proplist(Props, State); 110 json_encode_proplist(Props, State);
111 json_encode({json, IoList}, _State) -> 111 json_encode({json, IoList}, _State) ->
112 IoList; 112 IoList;
113 json_encode(Bad, #encoder{handler=null}) -> 113 json_encode(Bad, #encoder{handler=null}) ->
114 exit({json_encode, {bad_term, Bad}}); 114 exit({json_encode, {bad_term, Bad}});
115 json_encode(Bad, State=#encoder{handler=Handler}) -> 115 json_encode(Bad, State=#encoder{handler=Handler}) ->
116 json_encode(Handler(Bad), State). 116 json_encode(Handler(Bad), State).
117 117
118 json_encode_array([], _State) -> 118 json_encode_array([], _State) ->
119 <<"[]">>; 119 <<"[]">>;
120 json_encode_array(L, State) -> 120 json_encode_array(L, State) ->
121 F = fun (O, Acc) -> 121 F = fun (O, Acc) ->
122 [$,, json_encode(O, State) | Acc] 122 [$,, json_encode(O, State) | Acc]
123 end, 123 end,
124 [$, | Acc1] = lists:foldl(F, "[", L), 124 [$, | Acc1] = lists:foldl(F, "[", L),
125 lists:reverse([$\] | Acc1]). 125 lists:reverse([$\] | Acc1]).
126 126
127 json_encode_proplist([], _State) -> 127 json_encode_proplist([], _State) ->
128 <<"{}">>; 128 <<"{}">>;
129 json_encode_proplist(Props, State) -> 129 json_encode_proplist(Props, State) ->
130 F = fun ({K, V}, Acc) -> 130 F = fun ({K, V}, Acc) ->
131 KS = json_encode_string(K, State), 131 KS = json_encode_string(K, State),
132 VS = json_encode(V, State), 132 VS = json_encode(V, State),
133 [$,, VS, $:, KS | Acc] 133 [$,, VS, $:, KS | Acc]
134 end, 134 end,
135 [$, | Acc1] = lists:foldl(F, "{", Props), 135 [$, | Acc1] = lists:foldl(F, "{", Props),
136 lists:reverse([$\} | Acc1]). 136 lists:reverse([$\} | Acc1]).
137 137
138 json_encode_string(A, State) when is_atom(A) -> 138 json_encode_string(A, State) when is_atom(A) ->
139 L = atom_to_list(A), 139 L = atom_to_list(A),
140 case json_string_is_safe(L) of 140 case json_string_is_safe(L) of
141 true -> 141 true ->
142 [?Q, L, ?Q]; 142 [?Q, L, ?Q];
143 false -> 143 false ->
144 json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q]) 144 json_encode_string_unicode(xmerl_ucs:from_utf8(L), State, [?Q])
145 end; 145 end;
146 json_encode_string(B, State) when is_binary(B) -> 146 json_encode_string(B, State) when is_binary(B) ->
147 case json_bin_is_safe(B) of 147 case json_bin_is_safe(B) of
148 true -> 148 true ->
149 [?Q, B, ?Q]; 149 [?Q, B, ?Q];
150 false -> 150 false ->
151 json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q]) 151 json_encode_string_unicode(xmerl_ucs:from_utf8(B), State, [?Q])
152 end; 152 end;
153 json_encode_string(I, _State) when is_integer(I) -> 153 json_encode_string(I, _State) when is_integer(I) ->
154 [?Q, integer_to_list(I), ?Q]; 154 [?Q, integer_to_list(I), ?Q];
155 json_encode_string(L, State) when is_list(L) -> 155 json_encode_string(L, State) when is_list(L) ->
156 case json_string_is_safe(L) of 156 case json_string_is_safe(L) of
157 true -> 157 true ->
158 [?Q, L, ?Q]; 158 [?Q, L, ?Q];
159 false -> 159 false ->
160 json_encode_string_unicode(L, State, [?Q]) 160 json_encode_string_unicode(L, State, [?Q])
161 end. 161 end.
162 162
163 json_string_is_safe([]) -> 163 json_string_is_safe([]) ->
164 true; 164 true;
165 json_string_is_safe([C | Rest]) -> 165 json_string_is_safe([C | Rest]) ->
166 case C of 166 case C of
167 ?Q -> 167 ?Q ->
168 false; 168 false;
169 $\\ -> 169 $\\ ->
170 false; 170 false;
171 $\b -> 171 $\b ->
172 false; 172 false;
173 $\f -> 173 $\f ->
174 false; 174 false;
175 $\n -> 175 $\n ->
176 false; 176 false;
177 $\r -> 177 $\r ->
178 false; 178 false;
179 $\t -> 179 $\t ->
180 false; 180 false;
181 C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> 181 C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF ->
182 false; 182 false;
183 C when C < 16#7f -> 183 C when C < 16#7f ->
184 json_string_is_safe(Rest); 184 json_string_is_safe(Rest);
185 _ -> 185 _ ->
186 false 186 false
187 end. 187 end.
188 188
189 json_bin_is_safe(<<>>) -> 189 json_bin_is_safe(<<>>) ->
190 true; 190 true;
191 json_bin_is_safe(<<C, Rest/binary>>) -> 191 json_bin_is_safe(<<C, Rest/binary>>) ->
192 case C of 192 case C of
193 ?Q -> 193 ?Q ->
194 false; 194 false;
195 $\\ -> 195 $\\ ->
196 false; 196 false;
197 $\b -> 197 $\b ->
198 false; 198 false;
199 $\f -> 199 $\f ->
200 false; 200 false;
201 $\n -> 201 $\n ->
202 false; 202 false;
203 $\r -> 203 $\r ->
204 false; 204 false;
205 $\t -> 205 $\t ->
206 false; 206 false;
207 C when C >= 0, C < $\s; C >= 16#7f, C =< 16#10FFFF -> 207 C when C >= 0, C < $\s; C >= 16#7f ->
208 false; 208 false;
209 C when C < 16#7f -> 209 C when C < 16#7f ->
210 json_bin_is_safe(Rest); 210 json_bin_is_safe(Rest)
211 _ ->
212 false
213 end. 211 end.
214 212
215 json_encode_string_unicode([], _State, Acc) -> 213 json_encode_string_unicode([], _State, Acc) ->
216 lists:reverse([$\" | Acc]); 214 lists:reverse([$\" | Acc]);
217 json_encode_string_unicode([C | Cs], State, Acc) -> 215 json_encode_string_unicode([C | Cs], State, Acc) ->
218 Acc1 = case C of 216 Acc1 = case C of
219 ?Q -> 217 ?Q ->
220 [?Q, $\\ | Acc]; 218 [?Q, $\\ | Acc];
221 %% Escaping solidus is only useful when trying to protect 219 %% Escaping solidus is only useful when trying to protect
222 %% against "</script>" injection attacks which are only 220 %% against "</script>" injection attacks which are only
223 %% possible when JSON is inserted into a HTML document 221 %% possible when JSON is inserted into a HTML document
224 %% in-line. mochijson2 does not protect you from this, so 222 %% in-line. mochijson2 does not protect you from this, so
225 %% if you do insert directly into HTML then you need to 223 %% if you do insert directly into HTML then you need to
226 %% uncomment the following case or escape the output of encode. 224 %% uncomment the following case or escape the output of encode.
227 %% 225 %%
228 %% $/ -> 226 %% $/ ->
229 %% [$/, $\\ | Acc]; 227 %% [$/, $\\ | Acc];
230 %% 228 %%
231 $\\ -> 229 $\\ ->
232 [$\\, $\\ | Acc]; 230 [$\\, $\\ | Acc];
233 $\b -> 231 $\b ->
234 [$b, $\\ | Acc]; 232 [$b, $\\ | Acc];
235 $\f -> 233 $\f ->
236 [$f, $\\ | Acc]; 234 [$f, $\\ | Acc];
237 $\n -> 235 $\n ->
238 [$n, $\\ | Acc]; 236 [$n, $\\ | Acc];
239 $\r -> 237 $\r ->
240 [$r, $\\ | Acc]; 238 [$r, $\\ | Acc];
241 $\t -> 239 $\t ->
242 [$t, $\\ | Acc]; 240 [$t, $\\ | Acc];
243 C when C >= 0, C < $\s -> 241 C when C >= 0, C < $\s ->
244 [unihex(C) | Acc]; 242 [unihex(C) | Acc];
245 C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 -> 243 C when C >= 16#7f, C =< 16#10FFFF, State#encoder.utf8 ->
246 [xmerl_ucs:to_utf8(C) | Acc]; 244 [xmerl_ucs:to_utf8(C) | Acc];
247 C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 -> 245 C when C >= 16#7f, C =< 16#10FFFF, not State#encoder.utf8 ->
248 [unihex(C) | Acc]; 246 [unihex(C) | Acc];
249 C when C < 16#7f -> 247 C when C < 16#7f ->
250 [C | Acc]; 248 [C | Acc];
251 _ -> 249 _ ->
252 exit({json_encode, {bad_char, C}}) 250 exit({json_encode, {bad_char, C}})
253 end, 251 end,
254 json_encode_string_unicode(Cs, State, Acc1). 252 json_encode_string_unicode(Cs, State, Acc1).
255 253
256 hexdigit(C) when C >= 0, C =< 9 -> 254 hexdigit(C) when C >= 0, C =< 9 ->
257 C + $0; 255 C + $0;
258 hexdigit(C) when C =< 15 -> 256 hexdigit(C) when C =< 15 ->
259 C + $a - 10. 257 C + $a - 10.
260 258
261 unihex(C) when C < 16#10000 -> 259 unihex(C) when C < 16#10000 ->
262 <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>, 260 <<D3:4, D2:4, D1:4, D0:4>> = <<C:16>>,
263 Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]], 261 Digits = [hexdigit(D) || D <- [D3, D2, D1, D0]],
264 [$\\, $u | Digits]; 262 [$\\, $u | Digits];
265 unihex(C) when C =< 16#10FFFF -> 263 unihex(C) when C =< 16#10FFFF ->
266 N = C - 16#10000, 264 N = C - 16#10000,
267 S1 = 16#d800 bor ((N bsr 10) band 16#3ff), 265 S1 = 16#d800 bor ((N bsr 10) band 16#3ff),
268 S2 = 16#dc00 bor (N band 16#3ff), 266 S2 = 16#dc00 bor (N band 16#3ff),
269 [unihex(S1), unihex(S2)]. 267 [unihex(S1), unihex(S2)].
270 268
271 json_decode(L, S) when is_list(L) -> 269 json_decode(L, S) when is_list(L) ->
272 json_decode(iolist_to_binary(L), S); 270 json_decode(iolist_to_binary(L), S);
273 json_decode(B, S) -> 271 json_decode(B, S) ->
274 {Res, S1} = decode1(B, S), 272 {Res, S1} = decode1(B, S),
275 {eof, _} = tokenize(B, S1#decoder{state=trim}), 273 {eof, _} = tokenize(B, S1#decoder{state=trim}),
276 Res. 274 Res.
277 275
278 decode1(B, S=#decoder{state=null}) -> 276 decode1(B, S=#decoder{state=null}) ->
279 case tokenize(B, S#decoder{state=any}) of 277 case tokenize(B, S#decoder{state=any}) of
280 {{const, C}, S1} -> 278 {{const, C}, S1} ->
281 {C, S1}; 279 {C, S1};
282 {start_array, S1} -> 280 {start_array, S1} ->
283 decode_array(B, S1); 281 decode_array(B, S1);
284 {start_object, S1} -> 282 {start_object, S1} ->
285 decode_object(B, S1) 283 decode_object(B, S1)
286 end. 284 end.
287 285
288 make_object(V, #decoder{object_hook=null}) -> 286 make_object(V, #decoder{object_hook=null}) ->
289 V; 287 V;
290 make_object(V, #decoder{object_hook=Hook}) -> 288 make_object(V, #decoder{object_hook=Hook}) ->
291 Hook(V). 289 Hook(V).
292 290
293 decode_object(B, S) -> 291 decode_object(B, S) ->
294 decode_object(B, S#decoder{state=key}, []). 292 decode_object(B, S#decoder{state=key}, []).
295 293
296 decode_object(B, S=#decoder{state=key}, Acc) -> 294 decode_object(B, S=#decoder{state=key}, Acc) ->
297 case tokenize(B, S) of 295 case tokenize(B, S) of
298 {end_object, S1} -> 296 {end_object, S1} ->
299 V = make_object({struct, lists:reverse(Acc)}, S1), 297 V = make_object({struct, lists:reverse(Acc)}, S1),
300 {V, S1#decoder{state=null}}; 298 {V, S1#decoder{state=null}};
301 {{const, K}, S1} -> 299 {{const, K}, S1} ->
302 {colon, S2} = tokenize(B, S1), 300 {colon, S2} = tokenize(B, S1),
303 {V, S3} = decode1(B, S2#decoder{state=null}), 301 {V, S3} = decode1(B, S2#decoder{state=null}),
304 decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc]) 302 decode_object(B, S3#decoder{state=comma}, [{K, V} | Acc])
305 end; 303 end;
306 decode_object(B, S=#decoder{state=comma}, Acc) -> 304 decode_object(B, S=#decoder{state=comma}, Acc) ->
307 case tokenize(B, S) of 305 case tokenize(B, S) of
308 {end_object, S1} -> 306 {end_object, S1} ->
309 V = make_object({struct, lists:reverse(Acc)}, S1), 307 V = make_object({struct, lists:reverse(Acc)}, S1),
310 {V, S1#decoder{state=null}}; 308 {V, S1#decoder{state=null}};
311 {comma, S1} -> 309 {comma, S1} ->
312 decode_object(B, S1#decoder{state=key}, Acc) 310 decode_object(B, S1#decoder{state=key}, Acc)
313 end. 311 end.
314 312
315 decode_array(B, S) -> 313 decode_array(B, S) ->
316 decode_array(B, S#decoder{state=any}, []). 314 decode_array(B, S#decoder{state=any}, []).
317 315
318 decode_array(B, S=#decoder{state=any}, Acc) -> 316 decode_array(B, S=#decoder{state=any}, Acc) ->
319 case tokenize(B, S) of 317 case tokenize(B, S) of
320 {end_array, S1} -> 318 {end_array, S1} ->
321 {lists:reverse(Acc), S1#decoder{state=null}}; 319 {lists:reverse(Acc), S1#decoder{state=null}};
322 {start_array, S1} -> 320 {start_array, S1} ->
323 {Array, S2} = decode_array(B, S1), 321 {Array, S2} = decode_array(B, S1),
324 decode_array(B, S2#decoder{state=comma}, [Array | Acc]); 322 decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
325 {start_object, S1} -> 323 {start_object, S1} ->
326 {Array, S2} = decode_object(B, S1), 324 {Array, S2} = decode_object(B, S1),
327 decode_array(B, S2#decoder{state=comma}, [Array | Acc]); 325 decode_array(B, S2#decoder{state=comma}, [Array | Acc]);
328 {{const, Const}, S1} -> 326 {{const, Const}, S1} ->
329 decode_array(B, S1#decoder{state=comma}, [Const | Acc]) 327 decode_array(B, S1#decoder{state=comma}, [Const | Acc])
330 end; 328 end;
331 decode_array(B, S=#decoder{state=comma}, Acc) -> 329 decode_array(B, S=#decoder{state=comma}, Acc) ->
332 case tokenize(B, S) of 330 case tokenize(B, S) of
333 {end_array, S1} -> 331 {end_array, S1} ->
334 {lists:reverse(Acc), S1#decoder{state=null}}; 332 {lists:reverse(Acc), S1#decoder{state=null}};
335 {comma, S1} -> 333 {comma, S1} ->
336 decode_array(B, S1#decoder{state=any}, Acc) 334 decode_array(B, S1#decoder{state=any}, Acc)
337 end. 335 end.
338 336
339 tokenize_string(B, S=#decoder{offset=O}) -> 337 tokenize_string(B, S=#decoder{offset=O}) ->
340 case tokenize_string_fast(B, O) of 338 case tokenize_string_fast(B, O) of
341 {escape, O1} -> 339 {escape, O1} ->
342 Length = O1 - O, 340 Length = O1 - O,
343 S1 = ?ADV_COL(S, Length), 341 S1 = ?ADV_COL(S, Length),
344 <<_:O/binary, Head:Length/binary, _/binary>> = B, 342 <<_:O/binary, Head:Length/binary, _/binary>> = B,
345 tokenize_string(B, S1, lists:reverse(binary_to_list(Head))); 343 tokenize_string(B, S1, lists:reverse(binary_to_list(Head)));
346 O1 -> 344 O1 ->
347 Length = O1 - O, 345 Length = O1 - O,
348 <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B, 346 <<_:O/binary, String:Length/binary, ?Q, _/binary>> = B,
349 {{const, String}, ?ADV_COL(S, Length + 1)} 347 {{const, String}, ?ADV_COL(S, Length + 1)}
350 end. 348 end.
351 349
352 tokenize_string_fast(B, O) -> 350 tokenize_string_fast(B, O) ->
353 case B of 351 case B of
354 <<_:O/binary, ?Q, _/binary>> -> 352 <<_:O/binary, ?Q, _/binary>> ->
355 O; 353 O;
356 <<_:O/binary, $\\, _/binary>> -> 354 <<_:O/binary, $\\, _/binary>> ->
357 {escape, O}; 355 {escape, O};
358 <<_:O/binary, C1, _/binary>> when C1 < 128 -> 356 <<_:O/binary, C1, _/binary>> when C1 < 128 ->
359 tokenize_string_fast(B, 1 + O); 357 tokenize_string_fast(B, 1 + O);
360 <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223, 358 <<_:O/binary, C1, C2, _/binary>> when C1 >= 194, C1 =< 223,
361 C2 >= 128, C2 =< 191 -> 359 C2 >= 128, C2 =< 191 ->
362 tokenize_string_fast(B, 2 + O); 360 tokenize_string_fast(B, 2 + O);
363 <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239, 361 <<_:O/binary, C1, C2, C3, _/binary>> when C1 >= 224, C1 =< 239,
364 C2 >= 128, C2 =< 191, 362 C2 >= 128, C2 =< 191,
365 C3 >= 128, C3 =< 191 -> 363 C3 >= 128, C3 =< 191 ->
366 tokenize_string_fast(B, 3 + O); 364 tokenize_string_fast(B, 3 + O);
367 <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244, 365 <<_:O/binary, C1, C2, C3, C4, _/binary>> when C1 >= 240, C1 =< 244,
368 C2 >= 128, C2 =< 191, 366 C2 >= 128, C2 =< 191,
369 C3 >= 128, C3 =< 191, 367 C3 >= 128, C3 =< 191,
370 C4 >= 128, C4 =< 191 -> 368 C4 >= 128, C4 =< 191 ->
371 tokenize_string_fast(B, 4 + O); 369 tokenize_string_fast(B, 4 + O);
372 _ -> 370 _ ->
373 throw(invalid_utf8) 371 throw(invalid_utf8)
374 end. 372 end.
375 373
376 tokenize_string(B, S=#decoder{offset=O}, Acc) -> 374 tokenize_string(B, S=#decoder{offset=O}, Acc) ->
377 case B of 375 case B of
378 <<_:O/binary, ?Q, _/binary>> -> 376 <<_:O/binary, ?Q, _/binary>> ->
379 {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)}; 377 {{const, iolist_to_binary(lists:reverse(Acc))}, ?INC_COL(S)};
380 <<_:O/binary, "\\\"", _/binary>> -> 378 <<_:O/binary, "\\\"", _/binary>> ->
381 tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]); 379 tokenize_string(B, ?ADV_COL(S, 2), [$\" | Acc]);
382 <<_:O/binary, "\\\\", _/binary>> -> 380 <<_:O/binary, "\\\\", _/binary>> ->
383 tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]); 381 tokenize_string(B, ?ADV_COL(S, 2), [$\\ | Acc]);
384 <<_:O/binary, "\\/", _/binary>> -> 382 <<_:O/binary, "\\/", _/binary>> ->
385 tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]); 383 tokenize_string(B, ?ADV_COL(S, 2), [$/ | Acc]);
386 <<_:O/binary, "\\b", _/binary>> -> 384 <<_:O/binary, "\\b", _/binary>> ->
387 tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]); 385 tokenize_string(B, ?ADV_COL(S, 2), [$\b | Acc]);
388 <<_:O/binary, "\\f", _/binary>> -> 386 <<_:O/binary, "\\f", _/binary>> ->
389 tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]); 387 tokenize_string(B, ?ADV_COL(S, 2), [$\f | Acc]);
390 <<_:O/binary, "\\n", _/binary>> -> 388 <<_:O/binary, "\\n", _/binary>> ->
391 tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]); 389 tokenize_string(B, ?ADV_COL(S, 2), [$\n | Acc]);
392 <<_:O/binary, "\\r", _/binary>> -> 390 <<_:O/binary, "\\r", _/binary>> ->
393 tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]); 391 tokenize_string(B, ?ADV_COL(S, 2), [$\r | Acc]);
394 <<_:O/binary, "\\t", _/binary>> -> 392 <<_:O/binary, "\\t", _/binary>> ->
395 tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]); 393 tokenize_string(B, ?ADV_COL(S, 2), [$\t | Acc]);
396 <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> -> 394 <<_:O/binary, "\\u", C3, C2, C1, C0, Rest/binary>> ->
397 C = erlang:list_to_integer([C3, C2, C1, C0], 16), 395 C = erlang:list_to_integer([C3, C2, C1, C0], 16),
398 if C > 16#D7FF, C < 16#DC00 -> 396 if C > 16#D7FF, C < 16#DC00 ->
399 %% coalesce UTF-16 surrogate pair 397 %% coalesce UTF-16 surrogate pair
400 <<"\\u", D3, D2, D1, D0, _/binary>> = Rest, 398 <<"\\u", D3, D2, D1, D0, _/binary>> = Rest,
401 D = erlang:list_to_integer([D3,D2,D1,D0], 16), 399 D = erlang:list_to_integer([D3,D2,D1,D0], 16),
402 [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer, 400 [CodePoint] = xmerl_ucs:from_utf16be(<<C:16/big-unsigned-integer,
403 D:16/big-unsigned-integer>>), 401 D:16/big-unsigned-integer>>),
404 Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc), 402 Acc1 = lists:reverse(xmerl_ucs:to_utf8(CodePoint), Acc),
405 tokenize_string(B, ?ADV_COL(S, 12), Acc1); 403 tokenize_string(B, ?ADV_COL(S, 12), Acc1);
406 true -> 404 true ->
407 Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc), 405 Acc1 = lists:reverse(xmerl_ucs:to_utf8(C), Acc),
408 tokenize_string(B, ?ADV_COL(S, 6), Acc1) 406 tokenize_string(B, ?ADV_COL(S, 6), Acc1)
409 end; 407 end;
410 <<_:O/binary, C, _/binary>> -> 408 <<_:O/binary, C, _/binary>> ->
411 tokenize_string(B, ?INC_CHAR(S, C), [C | Acc]) 409 tokenize_string(B, ?INC_CHAR(S, C), [C | Acc])
412 end. 410 end.
413 411
414 tokenize_number(B, S) -> 412 tokenize_number(B, S) ->
415 case tokenize_number(B, sign, S, []) of 413 case tokenize_number(B, sign, S, []) of
416 {{int, Int}, S1} -> 414 {{int, Int}, S1} ->
417 {{const, list_to_integer(Int)}, S1}; 415 {{const, list_to_integer(Int)}, S1};
418 {{float, Float}, S1} -> 416 {{float, Float}, S1} ->
419 {{const, list_to_float(Float)}, S1} 417 {{const, list_to_float(Float)}, S1}
420 end. 418 end.
421 419
422 tokenize_number(B, sign, S=#decoder{offset=O}, []) -> 420 tokenize_number(B, sign, S=#decoder{offset=O}, []) ->
423 case B of 421 case B of
424 <<_:O/binary, $-, _/binary>> -> 422 <<_:O/binary, $-, _/binary>> ->
425 tokenize_number(B, int, ?INC_COL(S), [$-]); 423 tokenize_number(B, int, ?INC_COL(S), [$-]);
426 _ -> 424 _ ->
427 tokenize_number(B, int, S, []) 425 tokenize_number(B, int, S, [])
428 end; 426 end;
429 tokenize_number(B, int, S=#decoder{offset=O}, Acc) -> 427 tokenize_number(B, int, S=#decoder{offset=O}, Acc) ->
430 case B of 428 case B of
431 <<_:O/binary, $0, _/binary>> -> 429 <<_:O/binary, $0, _/binary>> ->
432 tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]); 430 tokenize_number(B, frac, ?INC_COL(S), [$0 | Acc]);
433 <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 -> 431 <<_:O/binary, C, _/binary>> when C >= $1 andalso C =< $9 ->
434 tokenize_number(B, int1, ?INC_COL(S), [C | Acc]) 432 tokenize_number(B, int1, ?INC_COL(S), [C | Acc])
435 end; 433 end;
436 tokenize_number(B, int1, S=#decoder{offset=O}, Acc) -> 434 tokenize_number(B, int1, S=#decoder{offset=O}, Acc) ->
437 case B of 435 case B of
438 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> 436 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
439 tokenize_number(B, int1, ?INC_COL(S), [C | Acc]); 437 tokenize_number(B, int1, ?INC_COL(S), [C | Acc]);
440 _ -> 438 _ ->
441 tokenize_number(B, frac, S, Acc) 439 tokenize_number(B, frac, S, Acc)
442 end; 440 end;
443 tokenize_number(B, frac, S=#decoder{offset=O}, Acc) -> 441 tokenize_number(B, frac, S=#decoder{offset=O}, Acc) ->
444 case B of 442 case B of
445 <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 -> 443 <<_:O/binary, $., C, _/binary>> when C >= $0, C =< $9 ->
446 tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]); 444 tokenize_number(B, frac1, ?ADV_COL(S, 2), [C, $. | Acc]);
447 <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> 445 <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
448 tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]); 446 tokenize_number(B, esign, ?INC_COL(S), [$e, $0, $. | Acc]);
449 _ -> 447 _ ->
450 {{int, lists:reverse(Acc)}, S} 448 {{int, lists:reverse(Acc)}, S}
451 end; 449 end;
452 tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) -> 450 tokenize_number(B, frac1, S=#decoder{offset=O}, Acc) ->
453 case B of 451 case B of
454 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> 452 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
455 tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]); 453 tokenize_number(B, frac1, ?INC_COL(S), [C | Acc]);
456 <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E -> 454 <<_:O/binary, E, _/binary>> when E =:= $e orelse E =:= $E ->
457 tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]); 455 tokenize_number(B, esign, ?INC_COL(S), [$e | Acc]);
458 _ -> 456 _ ->
459 {{float, lists:reverse(Acc)}, S} 457 {{float, lists:reverse(Acc)}, S}
460 end; 458 end;
461 tokenize_number(B, esign, S=#decoder{offset=O}, Acc) -> 459 tokenize_number(B, esign, S=#decoder{offset=O}, Acc) ->
462 case B of 460 case B of
463 <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ -> 461 <<_:O/binary, C, _/binary>> when C =:= $- orelse C=:= $+ ->
464 tokenize_number(B, eint, ?INC_COL(S), [C | Acc]); 462 tokenize_number(B, eint, ?INC_COL(S), [C | Acc]);
465 _ -> 463 _ ->
466 tokenize_number(B, eint, S, Acc) 464 tokenize_number(B, eint, S, Acc)
467 end; 465 end;
468 tokenize_number(B, eint, S=#decoder{offset=O}, Acc) -> 466 tokenize_number(B, eint, S=#decoder{offset=O}, Acc) ->
469 case B of 467 case B of
470 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> 468 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
471 tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]) 469 tokenize_number(B, eint1, ?INC_COL(S), [C | Acc])
472 end; 470 end;
473 tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) -> 471 tokenize_number(B, eint1, S=#decoder{offset=O}, Acc) ->
474 case B of 472 case B of
475 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 -> 473 <<_:O/binary, C, _/binary>> when C >= $0 andalso C =< $9 ->
476 tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]); 474 tokenize_number(B, eint1, ?INC_COL(S), [C | Acc]);
477 _ -> 475 _ ->
478 {{float, lists:reverse(Acc)}, S} 476 {{float, lists:reverse(Acc)}, S}
479 end. 477 end.
480 478
481 tokenize(B, S=#decoder{offset=O}) -> 479 tokenize(B, S=#decoder{offset=O}) ->
482 case B of 480 case B of
483 <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) -> 481 <<_:O/binary, C, _/binary>> when ?IS_WHITESPACE(C) ->
484 tokenize(B, ?INC_CHAR(S, C)); 482 tokenize(B, ?INC_CHAR(S, C));
485 <<_:O/binary, "{", _/binary>> -> 483 <<_:O/binary, "{", _/binary>> ->
486 {start_object, ?INC_COL(S)}; 484 {start_object, ?INC_COL(S)};
487 <<_:O/binary, "}", _/binary>> -> 485 <<_:O/binary, "}", _/binary>> ->
488 {end_object, ?INC_COL(S)}; 486 {end_object, ?INC_COL(S)};
489 <<_:O/binary, "[", _/binary>> -> 487 <<_:O/binary, "[", _/binary>> ->
490 {start_array, ?INC_COL(S)}; 488 {start_array, ?INC_COL(S)};
491 <<_:O/binary, "]", _/binary>> -> 489 <<_:O/binary, "]", _/binary>> ->
492 {end_array, ?INC_COL(S)}; 490 {end_array, ?INC_COL(S)};
493 <<_:O/binary, ",", _/binary>> -> 491 <<_:O/binary, ",", _/binary>> ->
494 {comma, ?INC_COL(S)}; 492 {comma, ?INC_COL(S)};
495 <<_:O/binary, ":", _/binary>> -> 493 <<_:O/binary, ":", _/binary>> ->
496 {colon, ?INC_COL(S)}; 494 {colon, ?INC_COL(S)};
497 <<_:O/binary, "null", _/binary>> -> 495 <<_:O/binary, "null", _/binary>> ->
498 {{const, null}, ?ADV_COL(S, 4)}; 496 {{const, null}, ?ADV_COL(S, 4)};
499 <<_:O/binary, "true", _/binary>> -> 497 <<_:O/binary, "true", _/binary>> ->
500 {{const, true}, ?ADV_COL(S, 4)}; 498 {{const, true}, ?ADV_COL(S, 4)};
501 <<_:O/binary, "false", _/binary>> -> 499 <<_:O/binary, "false", _/binary>> ->
502 {{const, false}, ?ADV_COL(S, 5)}; 500 {{const, false}, ?ADV_COL(S, 5)};
503 <<_:O/binary, "\"", _/binary>> -> 501 <<_:O/binary, "\"", _/binary>> ->
504 tokenize_string(B, ?INC_COL(S)); 502 tokenize_string(B, ?INC_COL(S));
505 <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9) 503 <<_:O/binary, C, _/binary>> when (C >= $0 andalso C =< $9)
506 orelse C =:= $- -> 504 orelse C =:= $- ->
507 tokenize_number(B, S); 505 tokenize_number(B, S);
508 <<_:O/binary>> -> 506 <<_:O/binary>> ->
509 trim = S#decoder.state, 507 trim = S#decoder.state,
510 {eof, S} 508 {eof, S}
511 end. 509 end.
512 %% 510 %%
513 %% Tests 511 %% Tests
514 %% 512 %%
515 -include_lib("eunit/include/eunit.hrl"). 513 -include_lib("eunit/include/eunit.hrl").
516 -ifdef(TEST). 514 -ifdef(TEST).
517 515
518 516
519 %% testing constructs borrowed from the Yaws JSON implementation. 517 %% testing constructs borrowed from the Yaws JSON implementation.
520 518
521 %% Create an object from a list of Key/Value pairs. 519 %% Create an object from a list of Key/Value pairs.
522 520
523 obj_new() -> 521 obj_new() ->
524 {struct, []}. 522 {struct, []}.
525 523
526 is_obj({struct, Props}) -> 524 is_obj({struct, Props}) ->
527 F = fun ({K, _}) when is_binary(K) -> 525 F = fun ({K, _}) when is_binary(K) -> true end,
528 true;
529 (_) ->
530 false
531 end,
532 lists:all(F, Props). 526 lists:all(F, Props).
533 527
534 obj_from_list(Props) -> 528 obj_from_list(Props) ->
535 Obj = {struct, Props}, 529 Obj = {struct, Props},
536 case is_obj(Obj) of 530 ?assert(is_obj(Obj)),
537 true -> Obj; 531 Obj.
538 false -> exit({json_bad_object, Obj})
539 end.
540 532
541 %% Test for equivalence of Erlang terms. 533 %% Test for equivalence of Erlang terms.
542 %% Due to arbitrary order of construction, equivalent objects might 534 %% Due to arbitrary order of construction, equivalent objects might
543 %% compare unequal as erlang terms, so we need to carefully recurse 535 %% compare unequal as erlang terms, so we need to carefully recurse
544 %% through aggregates (tuples and objects). 536 %% through aggregates (tuples and objects).
545 537
546 equiv({struct, Props1}, {struct, Props2}) -> 538 equiv({struct, Props1}, {struct, Props2}) ->
547 equiv_object(Props1, Props2); 539 equiv_object(Props1, Props2);
548 equiv(L1, L2) when is_list(L1), is_list(L2) -> 540 equiv(L1, L2) when is_list(L1), is_list(L2) ->
549 equiv_list(L1, L2); 541 equiv_list(L1, L2);
550 equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2; 542 equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
551 equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2; 543 equiv(B1, B2) when is_binary(B1), is_binary(B2) -> B1 == B2;
552 equiv(true, true) -> true; 544 equiv(A, A) when A =:= true orelse A =:= false orelse A =:= null -> true.
553 equiv(false, false) -> true;
554 equiv(null, null) -> true.
555 545
556 %% Object representation and traversal order is unknown. 546 %% Object representation and traversal order is unknown.
557 %% Use the sledgehammer and sort property lists. 547 %% Use the sledgehammer and sort property lists.
558 548
559 equiv_object(Props1, Props2) -> 549 equiv_object(Props1, Props2) ->
560 L1 = lists:keysort(1, Props1), 550 L1 = lists:keysort(1, Props1),
561 L2 = lists:keysort(1, Props2), 551 L2 = lists:keysort(1, Props2),
562 Pairs = lists:zip(L1, L2), 552 Pairs = lists:zip(L1, L2),
563 true = lists:all(fun({{K1, V1}, {K2, V2}}) -> 553 true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
564 equiv(K1, K2) and equiv(V1, V2) 554 equiv(K1, K2) and equiv(V1, V2)
565 end, Pairs). 555 end, Pairs).
566 556
567 %% Recursively compare tuple elements for equivalence. 557 %% Recursively compare tuple elements for equivalence.
568 558
569 equiv_list([], []) -> 559 equiv_list([], []) ->
570 true; 560 true;
571 equiv_list([V1 | L1], [V2 | L2]) -> 561 equiv_list([V1 | L1], [V2 | L2]) ->
572 equiv(V1, V2) andalso equiv_list(L1, L2). 562 equiv(V1, V2) andalso equiv_list(L1, L2).
573 563
574 decode_test() -> 564 decode_test() ->
575 [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>), 565 [1199344435545.0, 1] = decode(<<"[1199344435545.0,1]">>),
576 <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]). 566 <<16#F0,16#9D,16#9C,16#95>> = decode([34,"\\ud835","\\udf15",34]).
577 567
578 e2j_vec_test() -> 568 e2j_vec_test() ->
579 test_one(e2j_test_vec(utf8), 1). 569 test_one(e2j_test_vec(utf8), 1).
580 570
581 test_one([], _N) -> 571 test_one([], _N) ->
582 %% io:format("~p tests passed~n", [N-1]), 572 %% io:format("~p tests passed~n", [N-1]),
583 ok; 573 ok;
584 test_one([{E, J} | Rest], N) -> 574 test_one([{E, J} | Rest], N) ->
585 %% io:format("[~p] ~p ~p~n", [N, E, J]), 575 %% io:format("[~p] ~p ~p~n", [N, E, J]),
586 true = equiv(E, decode(J)), 576 true = equiv(E, decode(J)),
587 true = equiv(E, decode(encode(E))), 577 true = equiv(E, decode(encode(E))),
588 test_one(Rest, 1+N). 578 test_one(Rest, 1+N).
589 579
590 e2j_test_vec(utf8) -> 580 e2j_test_vec(utf8) ->
591 [ 581 [
592 {1, "1"}, 582 {1, "1"},
593 {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes 583 {3.1416, "3.14160"}, %% text representation may truncate, trail zeroes
594 {-1, "-1"}, 584 {-1, "-1"},
595 {-3.1416, "-3.14160"}, 585 {-3.1416, "-3.14160"},
596 {12.0e10, "1.20000e+11"}, 586 {12.0e10, "1.20000e+11"},
597 {1.234E+10, "1.23400e+10"}, 587 {1.234E+10, "1.23400e+10"},
598 {-1.234E-10, "-1.23400e-10"}, 588 {-1.234E-10, "-1.23400e-10"},
599 {10.0, "1.0e+01"}, 589 {10.0, "1.0e+01"},
600 {123.456, "1.23456E+2"}, 590 {123.456, "1.23456E+2"},
601 {10.0, "1e1"}, 591 {10.0, "1e1"},
602 {<<"foo">>, "\"foo\""}, 592 {<<"foo">>, "\"foo\""},
603 {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""}, 593 {<<"foo", 5, "bar">>, "\"foo\\u0005bar\""},
604 {<<"">>, "\"\""}, 594 {<<"">>, "\"\""},
605 {<<"\n\n\n">>, "\"\\n\\n\\n\""}, 595 {<<"\n\n\n">>, "\"\\n\\n\\n\""},
606 {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""}, 596 {<<"\" \b\f\r\n\t\"">>, "\"\\\" \\b\\f\\r\\n\\t\\\"\""},
607 {obj_new(), "{}"}, 597 {obj_new(), "{}"},
608 {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"}, 598 {obj_from_list([{<<"foo">>, <<"bar">>}]), "{\"foo\":\"bar\"}"},
609 {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]), 599 {obj_from_list([{<<"foo">>, <<"bar">>}, {<<"baz">>, 123}]),
610 "{\"foo\":\"bar\",\"baz\":123}"}, 600 "{\"foo\":\"bar\",\"baz\":123}"},
611 {[], "[]"}, 601 {[], "[]"},
612 {[[]], "[[]]"}, 602 {[[]], "[[]]"},
613 {[1, <<"foo">>], "[1,\"foo\"]"}, 603 {[1, <<"foo">>], "[1,\"foo\"]"},
614 604
615 %% json array in a json object 605 %% json array in a json object
616 {obj_from_list([{<<"foo">>, [123]}]), 606 {obj_from_list([{<<"foo">>, [123]}]),
617 "{\"foo\":[123]}"}, 607 "{\"foo\":[123]}"},
618 608
619 %% json object in a json object 609 %% json object in a json object
620 {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]), 610 {obj_from_list([{<<"foo">>, obj_from_list([{<<"bar">>, true}])}]),
621 "{\"foo\":{\"bar\":true}}"}, 611 "{\"foo\":{\"bar\":true}}"},
622 612
623 %% fold evaluation order 613 %% fold evaluation order
624 {obj_from_list([{<<"foo">>, []}, 614 {obj_from_list([{<<"foo">>, []},
625 {<<"bar">>, obj_from_list([{<<"baz">>, true}])}, 615 {<<"bar">>, obj_from_list([{<<"baz">>, true}])},
626 {<<"alice">>, <<"bob">>}]), 616 {<<"alice">>, <<"bob">>}]),
627 "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"}, 617 "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
628 618
629 %% json object in a json array 619 %% json object in a json array
630 {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null], 620 {[-123, <<"foo">>, obj_from_list([{<<"bar">>, []}]), null],
631 "[-123,\"foo\",{\"bar\":[]},null]"} 621 "[-123,\"foo\",{\"bar\":[]},null]"}
632 ]. 622 ].
633 623
634 %% test utf8 encoding 624 %% test utf8 encoding
635 encoder_utf8_test() -> 625 encoder_utf8_test() ->
636 %% safe conversion case (default) 626 %% safe conversion case (default)
637 [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] = 627 [34,"\\u0001","\\u0442","\\u0435","\\u0441","\\u0442",34] =
638 encode(<<1,"\321\202\320\265\321\201\321\202">>), 628 encode(<<1,"\321\202\320\265\321\201\321\202">>),
639 629
640 %% raw utf8 output (optional) 630 %% raw utf8 output (optional)
641 Enc = mochijson2:encoder([{utf8, true}]), 631 Enc = mochijson2:encoder([{utf8, true}]),
642 [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] = 632 [34,"\\u0001",[209,130],[208,181],[209,129],[209,130],34] =
643 Enc(<<1,"\321\202\320\265\321\201\321\202">>). 633 Enc(<<1,"\321\202\320\265\321\201\321\202">>).
644 634
645 input_validation_test() -> 635 input_validation_test() ->
646 Good = [ 636 Good = [
647 {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound 637 {16#00A3, <<?Q, 16#C2, 16#A3, ?Q>>}, %% pound
648 {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro 638 {16#20AC, <<?Q, 16#E2, 16#82, 16#AC, ?Q>>}, %% euro
649 {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius 639 {16#10196, <<?Q, 16#F0, 16#90, 16#86, 16#96, ?Q>>} %% denarius
650 ], 640 ],
651 lists:foreach(fun({CodePoint, UTF8}) -> 641 lists:foreach(fun({CodePoint, UTF8}) ->
652 Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)), 642 Expect = list_to_binary(xmerl_ucs:to_utf8(CodePoint)),
653 Expect = decode(UTF8) 643 Expect = decode(UTF8)
654 end, Good), 644 end, Good),
655 645
656 Bad = [ 646 Bad = [
657 %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte 647 %% 2nd, 3rd, or 4th byte of a multi-byte sequence w/o leading byte
658 <<?Q, 16#80, ?Q>>, 648 <<?Q, 16#80, ?Q>>,
659 %% missing continuations, last byte in each should be 80-BF 649 %% missing continuations, last byte in each should be 80-BF
660 <<?Q, 16#C2, 16#7F, ?Q>>, 650 <<?Q, 16#C2, 16#7F, ?Q>>,
661 <<?Q, 16#E0, 16#80,16#7F, ?Q>>, 651 <<?Q, 16#E0, 16#80,16#7F, ?Q>>,
662 <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>, 652 <<?Q, 16#F0, 16#80, 16#80, 16#7F, ?Q>>,
663 %% we don't support code points > 10FFFF per RFC 3629 653 %% we don't support code points > 10FFFF per RFC 3629
664 <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>> 654 <<?Q, 16#F5, 16#80, 16#80, 16#80, ?Q>>
665 ], 655 ],
666 lists:foreach(fun(X) -> 656 lists:foreach(
667 ok = try decode(X) catch invalid_utf8 -> ok end 657 fun(X) ->
668 end, Bad). 658 ok = try decode(X) catch invalid_utf8 -> ok end,
659 %% could be {ucs,{bad_utf8_character_code}} or
660 %% {json_encode,{bad_char,_}}
661 {'EXIT', _} = (catch encode(X))
662 end, Bad).
669 663
670 inline_json_test() -> 664 inline_json_test() ->
671 ?assertEqual(<<"\"iodata iodata\"">>, 665 ?assertEqual(<<"\"iodata iodata\"">>,
672 iolist_to_binary( 666 iolist_to_binary(
673 encode({json, [<<"\"iodata">>, " iodata\""]}))), 667 encode({json, [<<"\"iodata">>, " iodata\""]}))),
674 ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]}, 668 ?assertEqual({struct, [{<<"key">>, <<"iodata iodata">>}]},
675 decode( 669 decode(
676 encode({struct, 670 encode({struct,
677 [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))), 671 [{key, {json, [<<"\"iodata">>, " iodata\""]}}]}))),
678 ok. 672 ok.
679 673
680 big_unicode_test() -> 674 big_unicode_test() ->
681 UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)), 675 UTF8Seq = list_to_binary(xmerl_ucs:to_utf8(16#0001d120)),
682 ?assertEqual( 676 ?assertEqual(
683 <<"\"\\ud834\\udd20\"">>, 677 <<"\"\\ud834\\udd20\"">>,
684 iolist_to_binary(encode(UTF8Seq))), 678 iolist_to_binary(encode(UTF8Seq))),
685 ?assertEqual( 679 ?assertEqual(
686 UTF8Seq, 680 UTF8Seq,
687 decode(iolist_to_binary(encode(UTF8Seq)))), 681 decode(iolist_to_binary(encode(UTF8Seq)))),
688 ok. 682 ok.
689 683
690 custom_decoder_test() -> 684 custom_decoder_test() ->
691 ?assertEqual( 685 ?assertEqual(
692 {struct, [{<<"key">>, <<"value">>}]}, 686 {struct, [{<<"key">>, <<"value">>}]},
693 (decoder([]))("{\"key\": \"value\"}")), 687 (decoder([]))("{\"key\": \"value\"}")),
694 F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end, 688 F = fun ({struct, [{<<"key">>, <<"value">>}]}) -> win end,
695 ?assertEqual( 689 ?assertEqual(
696 win, 690 win,
697 (decoder([{object_hook, F}]))("{\"key\": \"value\"}")), 691 (decoder([{object_hook, F}]))("{\"key\": \"value\"}")),
698 ok. 692 ok.
699 693
700 atom_test() -> 694 atom_test() ->
701 %% JSON native atoms 695 %% JSON native atoms
702 [begin 696 [begin
703 ?assertEqual(A, decode(atom_to_list(A))), 697 ?assertEqual(A, decode(atom_to_list(A))),
704 ?assertEqual(iolist_to_binary(atom_to_list(A)), 698 ?assertEqual(iolist_to_binary(atom_to_list(A)),
705 iolist_to_binary(encode(A))) 699 iolist_to_binary(encode(A)))
706 end || A <- [true, false, null]], 700 end || A <- [true, false, null]],
707 %% Atom to string 701 %% Atom to string
708 ?assertEqual( 702 ?assertEqual(
709 <<"\"foo\"">>, 703 <<"\"foo\"">>,
710 iolist_to_binary(encode(foo))), 704 iolist_to_binary(encode(foo))),
711 ?assertEqual( 705 ?assertEqual(
712 <<"\"\\ud834\\udd20\"">>, 706 <<"\"\\ud834\\udd20\"">>,
713 iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))), 707 iolist_to_binary(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))),
714 ok. 708 ok.
715 709
716 key_encode_test() -> 710 key_encode_test() ->
717 %% Some forms are accepted as keys that would not be strings in other 711 %% Some forms are accepted as keys that would not be strings in other
718 %% cases 712 %% cases
719 ?assertEqual( 713 ?assertEqual(
720 <<"{\"foo\":1}">>, 714 <<"{\"foo\":1}">>,
721 iolist_to_binary(encode({struct, [{foo, 1}]}))), 715 iolist_to_binary(encode({struct, [{foo, 1}]}))),
722 ?assertEqual( 716 ?assertEqual(
723 <<"{\"foo\":1}">>, 717 <<"{\"foo\":1}">>,
724 iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))), 718 iolist_to_binary(encode({struct, [{<<"foo">>, 1}]}))),
725 ?assertEqual( 719 ?assertEqual(
726 <<"{\"foo\":1}">>, 720 <<"{\"foo\":1}">>,
727 iolist_to_binary(encode({struct, [{"foo", 1}]}))), 721 iolist_to_binary(encode({struct, [{"foo", 1}]}))),
728 ?assertEqual( 722 ?assertEqual(
723 <<"{\"\\ud834\\udd20\":1}">>,
724 iolist_to_binary(
725 encode({struct, [{[16#0001d120], 1}]}))),
726 ?assertEqual(
729 <<"{\"1\":1}">>, 727 <<"{\"1\":1}">>,
730 iolist_to_binary(encode({struct, [{1, 1}]}))), 728 iolist_to_binary(encode({struct, [{1, 1}]}))),
731 ok. 729 ok.
732 730
731 unsafe_chars_test() ->
732 Chars = "\"\\\b\f\n\r\t",
733 [begin
734 ?assertEqual(false, json_string_is_safe([C])),
735 ?assertEqual(false, json_bin_is_safe(<<C>>)),
736 ?assertEqual(<<C>>, decode(encode(<<C>>)))
737 end || C <- Chars],
738 ?assertEqual(
739 false,
740 json_string_is_safe([16#0001d120])),
741 ?assertEqual(
742 false,
743 json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8(16#0001d120)))),
744 ?assertEqual(
745 [16#0001d120],
746 xmerl_ucs:from_utf8(
747 binary_to_list(
748 decode(encode(list_to_atom(xmerl_ucs:to_utf8(16#0001d120))))))),
749 ?assertEqual(
750 false,
751 json_string_is_safe([16#110000])),
752 ?assertEqual(
753 false,
754 json_bin_is_safe(list_to_binary(xmerl_ucs:to_utf8([16#110000])))),
755 %% solidus can be escaped but isn't unsafe by default
756 ?assertEqual(
757 <<"/">>,
758 decode(<<"\"\\/\"">>)),
759 ok.
760
761 int_test() ->
762 ?assertEqual(0, decode("0")),
763 ?assertEqual(1, decode("1")),
764 ?assertEqual(11, decode("11")),
765 ok.
766
733 float_fallback_test() -> 767 float_fallback_test() ->
734 ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649))), 768 ?assertEqual(<<"-2147483649.0">>, iolist_to_binary(encode(-2147483649))),
735 ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648))), 769 ?assertEqual(<<"2147483648.0">>, iolist_to_binary(encode(2147483648))),
736 ok. 770 ok.
737 771
772 handler_test() ->
773 ?assertEqual(
774 {'EXIT',{json_encode,{bad_term,{}}}},
775 catch encode({})),
776 F = fun ({}) -> [] end,
777 ?assertEqual(
778 <<"[]">>,
779 iolist_to_binary((encoder([{handler, F}]))({}))),
780 ok.
781
738 -endif. 782 -endif.
Powered by Google Project Hosting