diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 4a26419e65bee4..72632a8ef53d5b 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -264,7 +264,7 @@ Basic Usage .. function:: load(fp, *, cls=None, object_hook=None, parse_float=None, \ parse_int=None, parse_constant=None, \ - object_pairs_hook=None, **kw) + object_pairs_hook=None, array_hook=None, **kw) Deserialize *fp* to a Python object using the :ref:`JSON-to-Python conversion table `. @@ -301,6 +301,15 @@ Basic Usage Default ``None``. :type object_pairs_hook: :term:`callable` | None + :param array_hook: + If set, a function that is called with the result of + any JSON array literal decoded with as a Python list. + The return value of this function will be used + instead of the :class:`list`. + This feature can be used to implement custom decoders. + Default ``None``. + :type array_hook: :term:`callable` | None + :param parse_float: If set, a function that is called with the string of every JSON float to be decoded. @@ -349,7 +358,10 @@ Basic Usage conversion length limitation ` to help avoid denial of service attacks. -.. function:: loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw) + .. versionchanged:: next + Added the optional *array_hook* parameter. + +.. function:: loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, array_hook=None, **kw) Identical to :func:`load`, but instead of a file-like object, deserialize *s* (a :class:`str`, :class:`bytes` or :class:`bytearray` @@ -367,7 +379,7 @@ Basic Usage Encoders and Decoders --------------------- -.. class:: JSONDecoder(*, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None) +.. class:: JSONDecoder(*, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None, array_hook=None) Simple JSON decoder. @@ -412,6 +424,14 @@ Encoders and Decoders .. versionchanged:: 3.1 Added support for *object_pairs_hook*. + *array_hook* is an optional function that will be called with the + result of every JSON array decoded as a list. The return value of + *array_hook* will be used instead of the :class:`list`. This feature can be + used to implement custom decoders. + + .. versionchanged:: next + Added support for *array_hook*. + *parse_float* is an optional function that will be called with the string of every JSON float to be decoded. By default, this is equivalent to ``float(num_str)``. This can be used to use another datatype or parser for diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 76e97cf4b55595..b27e820f4a2da8 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -801,6 +801,17 @@ inspect for :func:`~inspect.getdoc`. (Contributed by Serhiy Storchaka in :gh:`132686`.) +json +---- + +* Add the *array_hook* parameter to :func:`~json.load` and + :func:`~json.loads` functions: + allow a callback for JSON literal array types to customize Python lists in + the resulting decoded object. Passing combined :class:`frozendict` to + *object_pairs_hook* param and :class:`tuple` to ``array_hook`` will yield a + deeply nested immutable Python structure representing the JSON data. + (Contributed by Joao S. O. Bueno in :gh:`146440`) + locale ------ diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 89396b25a2cbb3..251025efac14b8 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -241,7 +241,7 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, **kw).encode(obj) -_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) +_default_decoder = JSONDecoder() def detect_encoding(b): @@ -275,7 +275,8 @@ def detect_encoding(b): def load(fp, *, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + array_hook=None, **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. @@ -291,17 +292,26 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None, ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + ``array_hook`` is an optional function that will be called with the result + of any literal array decode (a ``list``). The return value of this function will + be used instead of the ``list``. This feature can be used along + ``object_pairs_hook`` to customize the resulting data structure - for example, + by setting that to ``frozendict`` and ``array_hook`` to ``tuple``, one can get + a deep immutable data structute from any JSON data. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg; otherwise ``JSONDecoder`` is used. """ return loads(fp.read(), cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw) + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + array_hook=None, **kw) def loads(s, *, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + array_hook=None, **kw): """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance containing a JSON document) to a Python object. @@ -317,6 +327,13 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + ``array_hook`` is an optional function that will be called with the result + of any literal array decode (a ``list``). The return value of this function will + be used instead of the ``list``. This feature can be used along + ``object_pairs_hook`` to customize the resulting data structure - for example, + by setting that to ``frozendict`` and ``array_hook`` to ``tuple``, one can get + a deep immutable data structute from any JSON data. + ``parse_float``, if specified, will be called with the string of every JSON float to be decoded. By default this is equivalent to float(num_str). This can be used to use another datatype or parser @@ -347,7 +364,8 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, if (cls is None and object_hook is None and parse_int is None and parse_float is None and - parse_constant is None and object_pairs_hook is None and not kw): + parse_constant is None and object_pairs_hook is None + and array_hook is None and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder @@ -355,6 +373,8 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, kw['object_hook'] = object_hook if object_pairs_hook is not None: kw['object_pairs_hook'] = object_pairs_hook + if array_hook is not None: + kw['array_hook'] = array_hook if parse_float is not None: kw['parse_float'] = parse_float if parse_int is not None: diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index 4cd6f8367a1349..364e44d40cc307 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -218,7 +218,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, pairs = object_hook(pairs) return pairs, end -def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONArray(s_and_end, scan_once, array_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end values = [] nextchar = s[end:end + 1] @@ -227,6 +227,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): nextchar = s[end:end + 1] # Look-ahead for trivial empty array if nextchar == ']': + if array_hook is not None: + values = array_hook(values) return values, end + 1 _append = values.append while True: @@ -256,6 +258,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': raise JSONDecodeError("Illegal trailing comma before end of array", s, comma_idx) + if array_hook is not None: + values = array_hook(values) return values, end @@ -291,7 +295,7 @@ class JSONDecoder(object): def __init__(self, *, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, - object_pairs_hook=None): + object_pairs_hook=None, array_hook=None): """``object_hook``, if specified, will be called with the result of every JSON object decoded and its return value will be used in place of the given ``dict``. This can be used to provide custom @@ -304,6 +308,14 @@ def __init__(self, *, object_hook=None, parse_float=None, If ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + ``array_hook`` is an optional function that will be called with the + result of any literal array decode (a ``list``). The return value of + this function will be used instead of the ``list``. This feature can + be used along ``object_pairs_hook`` to customize the resulting data + structure - for example, by setting that to ``frozendict`` and + ``array_hook`` to ``tuple``, one can get a deep immutable data + structute from any JSON data. + ``parse_float``, if specified, will be called with the string of every JSON float to be decoded. By default this is equivalent to float(num_str). This can be used to use another datatype or parser @@ -330,6 +342,7 @@ def __init__(self, *, object_hook=None, parse_float=None, self.parse_constant = parse_constant or _CONSTANTS.__getitem__ self.strict = strict self.object_pairs_hook = object_pairs_hook + self.array_hook = array_hook self.parse_object = JSONObject self.parse_array = JSONArray self.parse_string = scanstring diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py index 090897515fe2f3..b484e00be0fd2a 100644 --- a/Lib/json/scanner.py +++ b/Lib/json/scanner.py @@ -23,6 +23,7 @@ def py_make_scanner(context): parse_constant = context.parse_constant object_hook = context.object_hook object_pairs_hook = context.object_pairs_hook + array_hook = context.array_hook memo = context.memo def _scan_once(string, idx): @@ -37,7 +38,7 @@ def _scan_once(string, idx): return parse_object((string, idx + 1), strict, _scan_once, object_hook, object_pairs_hook, memo) elif nextchar == '[': - return parse_array((string, idx + 1), _scan_once) + return parse_array((string, idx + 1), _scan_once, array_hook) elif nextchar == 'n' and string[idx:idx + 4] == 'null': return None, idx + 4 elif nextchar == 't' and string[idx:idx + 4] == 'true': diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index 2250af964c022b..d846c8af7ec434 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -69,6 +69,24 @@ def test_object_pairs_hook(self): object_pairs_hook=OrderedDict), OrderedDict([('empty', OrderedDict())])) + def test_array_hook(self): + s = '[1, 2, 3]' + t = self.loads(s, array_hook=tuple) + self.assertEqual(t, (1, 2, 3)) + self.assertEqual(type(t), tuple) + + # Nested array in inner structure with object_hook + s = '{"xkd": [[1], [2], [3]]}' + p = frozendict(xkd=((1,), (2,), (3,))) + data = self.loads(s, object_hook=frozendict, array_hook=tuple) + self.assertEqual(data, p) + self.assertEqual(type(data), frozendict) + self.assertEqual(type(data["xkd"]), tuple) + for item in data["xkd"]: + self.assertEqual(type(item), tuple) + + self.assertEqual(self.loads('[]', array_hook=tuple), ()) + def test_decoder_optimizations(self): # Several optimizations were made that skip over calls to # the whitespace regex, so this test is designed to try and diff --git a/Misc/NEWS.d/next/Library/2026-03-26-02-06-52.gh-issue-146440.HXjhQO.rst b/Misc/NEWS.d/next/Library/2026-03-26-02-06-52.gh-issue-146440.HXjhQO.rst new file mode 100644 index 00000000000000..231c56fa063e72 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-26-02-06-52.gh-issue-146440.HXjhQO.rst @@ -0,0 +1,6 @@ +:mod:`json`: Add the *array_hook* parameter to :func:`~json.load` and +:func:`~json.loads` functions: +allow a callback for JSON literal array types to customize Python lists in the +resulting decoded object. Passing combined :class:`frozendict` to +*object_pairs_hook* param and :class:`tuple` to ``array_hook`` will yield a +deeply nested immutable Python structure representing the JSON data. diff --git a/Modules/_json.c b/Modules/_json.c index f9c4f06bac7b43..f70c36125081d1 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -30,6 +30,7 @@ typedef struct _PyScannerObject { signed char strict; PyObject *object_hook; PyObject *object_pairs_hook; + PyObject *array_hook; PyObject *parse_float; PyObject *parse_int; PyObject *parse_constant; @@ -41,6 +42,7 @@ static PyMemberDef scanner_members[] = { {"strict", Py_T_BOOL, offsetof(PyScannerObject, strict), Py_READONLY, "strict"}, {"object_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_hook), Py_READONLY, "object_hook"}, {"object_pairs_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), Py_READONLY}, + {"array_hook", _Py_T_OBJECT, offsetof(PyScannerObject, array_hook), Py_READONLY}, {"parse_float", _Py_T_OBJECT, offsetof(PyScannerObject, parse_float), Py_READONLY, "parse_float"}, {"parse_int", _Py_T_OBJECT, offsetof(PyScannerObject, parse_int), Py_READONLY, "parse_int"}, {"parse_constant", _Py_T_OBJECT, offsetof(PyScannerObject, parse_constant), Py_READONLY, "parse_constant"}, @@ -720,6 +722,7 @@ scanner_traverse(PyObject *op, visitproc visit, void *arg) Py_VISIT(Py_TYPE(self)); Py_VISIT(self->object_hook); Py_VISIT(self->object_pairs_hook); + Py_VISIT(self->array_hook); Py_VISIT(self->parse_float); Py_VISIT(self->parse_int); Py_VISIT(self->parse_constant); @@ -732,6 +735,7 @@ scanner_clear(PyObject *op) PyScannerObject *self = PyScannerObject_CAST(op); Py_CLEAR(self->object_hook); Py_CLEAR(self->object_pairs_hook); + Py_CLEAR(self->array_hook); Py_CLEAR(self->parse_float); Py_CLEAR(self->parse_int); Py_CLEAR(self->parse_constant); @@ -942,6 +946,12 @@ _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssi goto bail; } *next_idx_ptr = idx + 1; + /* if array_hook is not None: return array_hook(rval) */ + if (!Py_IsNone(s->array_hook)) { + val = PyObject_CallOneArg(s->array_hook, rval); + Py_DECREF(rval); + return val; + } return rval; bail: Py_XDECREF(val); @@ -1259,6 +1269,10 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); if (s->object_pairs_hook == NULL) goto bail; + s->array_hook = PyObject_GetAttrString(ctx, "array_hook"); + if (s->array_hook == NULL) { + goto bail; + } s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); if (s->parse_float == NULL) goto bail;