diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 08ccdfa3f454f8..67203cc1ce47a3 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -403,13 +403,24 @@ following: .. index:: single: z; in string formatting -The ``'z'`` option coerces negative zero floating-point values to positive -zero after rounding to the format precision. This option is only valid for -floating-point presentation types. +For floating-point presentation types the ``'z'`` option coerces negative zero +floating-point values to positive zero after rounding to the format precision. + +For integer presentation types ``'b'``, ``'o'``, ``'x'``, and ``'X'`` formatted +with precision, the ``'z'`` 'modulo-precision' option first reduces the integer +into ``range(base ** precision)``. The result is a predictable two's complement +style formatting with the number of digits *exactly* equal to the precision. +This is especially useful for formatting negative numbers with known bounds +in environments that deal with fixed widths integers, such as :mod:`struct`. + +For other presentation types ``z`` is an invalid specifier. .. versionchanged:: 3.11 Added the ``'z'`` option (see also :pep:`682`). +.. versionchanged:: next + Implemented the ``'z'`` specifier for integer fields (see also :pep:`786`). + .. index:: single: # (hash); in string formatting The ``'#'`` option causes the "alternate form" to be used for the @@ -437,13 +448,30 @@ excluding :class:`complex`. This is equivalent to a *fill* character of Preceding the *width* field by ``'0'`` no longer affects the default alignment for strings. -The *precision* is a decimal integer indicating how many digits should be -displayed after the decimal point for presentation types -``'f'`` and ``'F'``, or before and after the decimal point for presentation -types ``'g'`` or ``'G'``. For string presentation types the field -indicates the maximum field size - in other words, how many characters will be -used from the field content. The *precision* is not allowed for integer -presentation types. +.. index:: single: precision; in string formatting + +For floating point presentation types ``'f'`` and ``'F'`` the *precision* +is a decimal integer indicating how many digits should be displayed after +the decimal point. For presentation types ``'g'`` and ``'G'`` the precision +is how many digits should be displayed in total before and after the +decimal point. + +For string presentation types the precision indicates the maximum +field size - in other words, how many characters will be used from the +field content. + +For integer presentation types (excluding ``'c'``), the precision defines the +minimum number of digits to be displayed, the result padded with leading +zeros if the length of the digits is smaller than the precision specified. +Precision differs from *width*, as only the digits of the number contribute +to the precision count - this is useful when one combines multiple format +specifiers together, and one desires a minimum number of digits, not a +minimum overall string length. ``z`` can be combined with precision to +format the number to **exactly** the precision number of digits, truncating +the result as necessary. + +.. versionchanged:: next + Implemented the *precision* specifier for integer presentation types. The *grouping* option after *width* and *precision* fields specifies a digit group separator for the integral and fractional parts @@ -793,6 +821,47 @@ Nesting arguments and more complex examples:: 10 A 12 1010 11 B 13 1011 +Comparing the precision and width specifiers:: + + >>> x = 10 + >>> f"{x:#02x}" + '0xa' + >>> # we really wanted 2 digits + >>> f"{x:#.2x}" + '0x0a' + >>> # that's better + >>> + >>> def hexdump(b: bytes) -> str: + ... return " ".join(f"{c:#.2x}" for c in b) + >>> + >>> hexdump(b"GET /\r\n\r\n") + '0x47 0x45 0x54 0x20 0x2f 0x0d 0x0a 0x0d 0x0a' + >>> # observe the CR and LF bytes padded to precision 2 + >>> # in this basic HTTP/0.9 request + >>> + >>> def unicode_dump(s: str) -> str: + ... return " ".join(f"U+{ord(c):.4X}" for c in s) + >>> + >>> unicode_dump("USA 🦅") + 'U+0055 U+0053 U+0041 U+0020 U+1F985' + >>> # observe the last character's Unicode codepoint has 5 digits; + >>> # precision is only the minimum number of digits + +Using the modulo-precision flag:: + + >>> import struct + >>> my_struct = b"\xff" + >>> (t,) = struct.unpack('b', my_struct) # signed char + >>> print(t, f"{t:#.2x}", f"{t:z#.2x}") + '-1 -0x01 0xff' + >>> (t,) = struct.unpack('B', my_struct) # unsigned char + >>> print(t, f"{t:#.2x}", f"{t:z#.2x}") + '255 0xff 0xff' + +Observe that in both the signed and unsigned unpacking the two's complement +formatting mode (``z``) produces a predictable, consistent string, suitable +for displaying byte-like output. + .. _template-strings-pep292: diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 00f1ab44b0a8fa..f01e315d894735 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -775,9 +775,19 @@ def test_specifier_z_error(self): with self.assertRaisesRegex(ValueError, error_msg): f"{0:fz}" # wrong position - error_msg = re.escape("Negative zero coercion (z) not allowed") + error_msg = re.escape("Two's complement (z) requires precision (.) format specifier") + with self.assertRaisesRegex(ValueError, error_msg): + f"{0:zx}" # can't apply two's complement without precision + + error_msg = re.escape("Two's complement (z) only allowed with integer presentation types 'b', 'o', 'x', and 'X'") + with self.assertRaisesRegex(ValueError, error_msg): + f"{0:z.8}" # can't apply to '' int presentation type with self.assertRaisesRegex(ValueError, error_msg): - f"{0:zd}" # can't apply to int presentation type + f"{0:z.8d}" # can't apply to 'd' int presentation type + with self.assertRaisesRegex(ValueError, error_msg): + f"{0:z.8n}" # can't apply to 'n' int presentation type + + error_msg = re.escape("Negative zero coercion (z) not allowed") with self.assertRaisesRegex(ValueError, error_msg): f"{'x':zs}" # can't apply to string diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index b48a8812a1a2d1..9e93063b1ff79c 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -676,6 +676,16 @@ def test__format__(self): self.assertEqual(format(123456789, ','), '123,456,789') self.assertEqual(format(123456789, '_'), '123_456_789') + ## precision + self.assertEqual(format(0, '.0'), '0') + self.assertEqual(format(67, '2.3'), '067') + self.assertEqual(format(67, '8.3'), ' 067') + self.assertEqual(format(67, ' .3'), ' 067') + + with self.assertRaises(ValueError): + format(123, ".2147483648") + # too large + # sign and aligning are interdependent self.assertEqual(format(1, "-"), '1') self.assertEqual(format(-1, "-"), '-1') @@ -707,6 +717,23 @@ def test__format__(self): self.assertEqual(format(1234567890, '_x'), '4996_02d2') self.assertEqual(format(1234567890, '_X'), '4996_02D2') + ## precision + self.assertEqual(format(10, '#.2x'), '0x0a') + self.assertEqual(format(21, '#.4x'), '0x0015') + self.assertEqual(format(8086, '#.4x'), '0x1f96') + self.assertEqual(format(314159, '#.4x'), '0x4cb2f') + self.assertEqual(format(-21, '#.4x'), '-0x0015') + self.assertEqual(format(-8086, '#.4x'), '-0x1f96') + self.assertEqual(format(-314159, '#.4x'), '-0x4cb2f') + + ## modulo / two's complement wrap + self.assertEqual(format(-129, 'z#.2x'), '0x7f') + self.assertEqual(format(127, 'z#.2x'), '0x7f') + self.assertEqual(format(383, 'z#.2x'), '0x7f') + self.assertEqual(format(-1, 'z#.2x'), '0xff') + self.assertEqual(format(255, 'z#.2x'), '0xff') + self.assertEqual(format(10, 'z#.2x'), '0x0a') + # octal self.assertEqual(format(3, "o"), "3") self.assertEqual(format(-3, "o"), "-3") @@ -721,6 +748,18 @@ def test__format__(self): self.assertRaises(ValueError, format, 1234567890, ',o') self.assertEqual(format(1234567890, '_o'), '111_4540_1322') + ## precision + self.assertEqual(format(18, '#.3o'), '0o022') + self.assertEqual(format(256, 'z.3o'), '400') + self.assertEqual(format(64, 'z.2o'), '00') + self.assertEqual(format(-257, 'z.3o'), '377') + + self.assertEqual(format(-1, 'z.3o'), '777') + # different from C `printf("%hho\n", -1);` which is '377' + # because hh is 8 bits, not divisible by 3, the number of + # bits per octal digit, whereas Python has unlimited precision + # and we request 9 bits (3 bits per octal digit * 3 digits) + # binary self.assertEqual(format(3, "b"), "11") self.assertEqual(format(-3, "b"), "-11") @@ -735,12 +774,36 @@ def test__format__(self): self.assertRaises(ValueError, format, 1234567890, ',b') self.assertEqual(format(12345, '_b'), '11_0000_0011_1001') - # make sure these are errors - self.assertRaises(ValueError, format, 3, "1.3") # precision disallowed + ## precision + self.assertEqual(format( 1, '#.8b'), "0b00000001") + self.assertEqual(format(-1, '#.8b'), "-0b00000001") + self.assertEqual(format(127, '#.8b'), "0b01111111") + self.assertEqual(format(128, '#.8b'), "0b10000000") + self.assertEqual(format(129, '#.8b'), "0b10000001") + self.assertEqual(format(301, '#.8b'), "0b100101101") + self.assertEqual(format( 15, ' #.8b'), " 0b00001111") + self.assertEqual(format( 15, '+#.8b'), "+0b00001111") + self.assertEqual(format(-15, ' #.8b'), "-0b00001111") + + ## modulo / two's complement wrap + self.assertEqual(format(-129, 'z#.8b'), '0b01111111') + self.assertEqual(format(127, 'z#.8b'), '0b01111111') + self.assertEqual(format(383, 'z#.8b'), '0b01111111') + self.assertEqual(format(-1, 'z#.8b'), '0b11111111') + self.assertEqual(format(255, 'z#.8b'), '0b11111111') + self.assertEqual(format(10, 'z#.8b'), '0b00001010') + self.assertEqual(format(600, ' z#8.8b'), ' 0b01011000') + self.assertEqual(format(600, ' z#12.8b'), ' 0b01011000') + + # make sure these are errors for 'c' presentation type + self.assertRaises(ValueError, format, 3, ".3c") # precision, + self.assertRaises(ValueError, format, 3, "zc") # two's complement, + self.assertRaises(ValueError, format, 3, "#c") # alternate, self.assertRaises(ValueError, format, 3, "_c") # underscore, - self.assertRaises(ValueError, format, 3, ",c") # comma, and - self.assertRaises(ValueError, format, 3, "+c") # sign not allowed - # with 'c' + self.assertRaises(ValueError, format, 3, ",c") # comma, + self.assertRaises(ValueError, format, 3, "+c") # + sign + self.assertRaises(ValueError, format, 3, "-c") # - sign + self.assertRaises(ValueError, format, 3, " c") # ' ' sign self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, '_,') self.assertRaisesRegex(ValueError, 'Cannot specify both', format, 3, ',_') diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 2084b30d71ff6c..a6f5ca3b046c69 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -387,8 +387,6 @@ def test(i, format_spec, result): # make sure these are errors - # precision disallowed - self.assertRaises(ValueError, 3 .__format__, "1.3") # sign not allowed with 'c' self.assertRaises(ValueError, 3 .__format__, "+c") # format spec must be string diff --git a/Misc/NEWS.d/next/Library/2026-03-25-21-10-53.gh-issue-146436.YSTTZK.rst b/Misc/NEWS.d/next/Library/2026-03-25-21-10-53.gh-issue-146436.YSTTZK.rst new file mode 100644 index 00000000000000..21cef43208f652 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-25-21-10-53.gh-issue-146436.YSTTZK.rst @@ -0,0 +1,4 @@ +Support precision format specifier (.) for integer fields in string formatting. +Support two's complement format specifier (z) for integer fields formatted +with precision and binary, octal, or hexadecimal presentation type. +Patch by Jay Berry. diff --git a/Objects/unicode_formatter.c b/Objects/unicode_formatter.c index b8604d1355940a..aef29ef13fbc32 100644 --- a/Objects/unicode_formatter.c +++ b/Objects/unicode_formatter.c @@ -1214,34 +1214,34 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, from a hard-code pseudo-locale */ LocaleInfo locale = LocaleInfo_STATIC_INIT; - /* no precision allowed on integers */ - if (format->precision != -1) { - PyErr_SetString(PyExc_ValueError, - "Precision not allowed in integer format specifier"); - goto done; - } - /* no negative zero coercion on integers */ - if (format->no_neg_0) { - PyErr_SetString(PyExc_ValueError, - "Negative zero coercion (z) not allowed in integer" - " format specifier"); - goto done; - } - /* special case for character formatting */ if (format->type == 'c') { /* error to specify a sign */ if (format->sign != '\0') { PyErr_SetString(PyExc_ValueError, "Sign not allowed with integer" - " format specifier 'c'"); + " presentation type 'c'"); goto done; } /* error to request alternate format */ if (format->alternate) { PyErr_SetString(PyExc_ValueError, "Alternate form (#) not allowed with integer" - " format specifier 'c'"); + " presentation type 'c'"); + goto done; + } + /* error to request precision */ + if (format->precision != -1) { + PyErr_SetString(PyExc_ValueError, + "Precision (.) not allowed with integer" + " presentation type 'c'"); + goto done; + } + /* error to request two's complement */ + if (format->no_neg_0) { + PyErr_SetString(PyExc_ValueError, + "Two's complement (z) not allowed with integer" + " presentation type 'c'"); goto done; } @@ -1269,35 +1269,80 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, } else { int base; - int leading_chars_to_skip = 0; /* Number of characters added by - PyNumber_ToBase that we want to - skip over. */ + int log2_base; + int leading_chars_to_skip; /* Number of characters added by + PyNumber_ToBase that we want to + skip over. */ + + if (format->precision == -1) { + /* precision not requested */ + + if (format->no_neg_0) { + /* two's complement format only allowed with precision */ + PyErr_SetString(PyExc_ValueError, + "Two's complement (z) requires precision (.)" + " format specifier"); + goto done; + } + } else { + /* precision requested */ + + if (format->no_neg_0 && !(format->type == 'b' || format->type == 'o' + || format->type == 'x' || format->type == 'X')) + { + /* two's complement format only allowed for bases that + are powers of two */ + + /* It is easier to specify which bases are allowed than + to single out 'c', 'd', 'n', and '' as forbidden, + because with '' (which is implemented the same as 'n') + the error message would have to take into account + whether the user literally specified 'n' or '' */ + PyErr_SetString(PyExc_ValueError, + "Two's complement (z) only allowed" + " with integer presentation types" + " 'b', 'o', 'x', and 'X'"); + goto done; + } + + /* finally check the precision length is sane */ + if (format->precision > INT_MAX) { + PyErr_SetString(PyExc_ValueError, "precision too big"); + goto done; + } + } /* Compute the base and how many characters will be added by PyNumber_ToBase */ switch (format->type) { case 'b': base = 2; + log2_base = 1; leading_chars_to_skip = 2; /* 0b */ break; case 'o': base = 8; + log2_base = 3; leading_chars_to_skip = 2; /* 0o */ break; case 'x': case 'X': base = 16; + log2_base = 4; leading_chars_to_skip = 2; /* 0x */ break; default: /* shouldn't be needed, but stops a compiler warning */ case 'd': case 'n': base = 10; + log2_base = -1; /* unused */ + leading_chars_to_skip = 0; break; } if (format->sign != '+' && format->sign != ' ' && format->width == -1 + && format->precision == -1 && format->type != 'X' && format->type != 'n' && !format->thousands_separators && PyLong_CheckExact(value)) @@ -1311,8 +1356,40 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, if (format->alternate) n_prefix = leading_chars_to_skip; - /* Do the hard part, converting to a string in a given base */ - tmp = _PyLong_Format(value, base); + if (format->no_neg_0) { + /* perform the reduction of value modulo (base ** precision) */ + int64_t shift_by; + PyObject *one; + PyObject *modulus; + PyObject *reduced_value; + + shift_by = log2_base * format->precision; + + one = PyLong_FromLong(1); + if (one == NULL) { + goto done; + } + + modulus = _PyLong_Lshift(one, shift_by); + Py_DECREF(one); + if (modulus == NULL) { + goto done; + } + + reduced_value = PyNumber_Remainder(value, modulus); + Py_DECREF(modulus); + if (reduced_value == NULL) { + goto done; + } + + /* Do the hard part, converting to a string in a given base */ + tmp = _PyLong_Format(reduced_value, base); + Py_DECREF(reduced_value); + } else { + /* Do the hard part, converting to a string in a given base */ + tmp = _PyLong_Format(value, base); + } + if (tmp == NULL) goto done; @@ -1332,6 +1409,41 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, /* Skip over the leading chars (0x, 0b, etc.) */ n_digits -= leading_chars_to_skip; inumeric_chars += leading_chars_to_skip; + + if (format->precision != -1 && n_digits < format->precision) { + /* prepend leading zeros (after sign and prefix if they exist) */ + PyObject *tmp2; + + Py_ssize_t zeros_needed = format->precision - n_digits; + Py_ssize_t tmp2_len = leading_chars_to_skip + format->precision; + + tmp2 = PyUnicode_New(tmp2_len, 127); + if (tmp2 == NULL) + goto done; + + if (PyUnicode_CopyCharacters(tmp2, 0, tmp, 0, + leading_chars_to_skip) < 0) + { + Py_DECREF(tmp2); + goto done; + } + if (PyUnicode_Fill(tmp2, leading_chars_to_skip, + zeros_needed, '0') < 0) + { + Py_DECREF(tmp2); + goto done; + } + if (PyUnicode_CopyCharacters(tmp2, + leading_chars_to_skip + zeros_needed, + tmp, leading_chars_to_skip, n_digits + ) < 0) + { + Py_DECREF(tmp2); + goto done; + } + Py_SETREF(tmp, tmp2); + n_digits = format->precision; + } } /* Determine the grouping, separator, and decimal point, if any. */