From 2a6713610f7d15143b3ba2b2497d9c3f8b487deb Mon Sep 17 00:00:00 2001 From: zhuangjinkai Date: Fri, 20 Mar 2026 02:57:41 +0000 Subject: [PATCH 1/2] =?UTF-8?q?gh-146455:=20Fix=20O(N=C2=B2)=20in=20add=5F?= =?UTF-8?q?const()=20after=20constant=20folding=20moved=20to=20CFG?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The add_const() function in flowgraph.c uses a linear search over the consts list to find the index of a constant. After gh-126835 moved constant folding from the AST optimizer to the CFG optimizer, this function is now called N times for N inner tuple elements during fold_tuple_of_constants(), resulting in O(N²) total time. Fix by maintaining an auxiliary _Py_hashtable_t that maps object pointers to their indices in the consts list, providing O(1) lookup. For a file with 100,000 constant 2-tuples: - Before: 10.38s (add_const occupies 83.76% of CPU time) - After: 1.48s --- Python/flowgraph.c | 139 +++++++++++++++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 43 deletions(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index f446a87ee69432..fe825576ca7a5f 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -6,6 +6,7 @@ #include "pycore_intrinsics.h" #include "pycore_pymem.h" // _PyMem_IsPtrFreed() #include "pycore_long.h" // _PY_IS_SMALL_INT() +#include "pycore_hashtable.h" // _Py_hashtable_t #include "pycore_opcode_utils.h" #include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc @@ -1325,30 +1326,43 @@ get_const_value(int opcode, int oparg, PyObject *co_consts) // Steals a reference to newconst. static int -add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache) +add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache, + _Py_hashtable_t *consts_index) { if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) { Py_DECREF(newconst); return -1; } - Py_ssize_t index; - for (index = 0; index < PyList_GET_SIZE(consts); index++) { - if (PyList_GET_ITEM(consts, index) == newconst) { - break; - } + /* O(1) lookup via pointer-keyed hashtable (replaces linear search). */ + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst); + if (entry != NULL) { + /* Already exists */ + Py_DECREF(newconst); + return (int)(uintptr_t)entry->value; } - if (index == PyList_GET_SIZE(consts)) { - if ((size_t)index >= (size_t)INT_MAX - 1) { - PyErr_SetString(PyExc_OverflowError, "too many constants"); - Py_DECREF(newconst); - return -1; - } - if (PyList_Append(consts, newconst)) { - Py_DECREF(newconst); - return -1; - } + + /* Not found – append to consts list */ + Py_ssize_t index = PyList_GET_SIZE(consts); + if ((size_t)index >= (size_t)INT_MAX - 1) { + PyErr_SetString(PyExc_OverflowError, "too many constants"); + Py_DECREF(newconst); + return -1; + } + if (PyList_Append(consts, newconst)) { + Py_DECREF(newconst); + return -1; } + + /* Update index (must be after successful append) */ + if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) { + /* OOM – rollback append for consistency */ + PyList_SetSlice(consts, index, index + 1, NULL); + Py_DECREF(newconst); + PyErr_NoMemory(); + return -1; + } + Py_DECREF(newconst); return (int)index; } @@ -1424,7 +1438,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst, /* Steals reference to "newconst" */ static int instr_make_load_const(cfg_instr *instr, PyObject *newconst, - PyObject *consts, PyObject *const_cache) + PyObject *consts, PyObject *const_cache, + _Py_hashtable_t *consts_index) { int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache); if (res < 0) { @@ -1434,7 +1449,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst, if (res > 0) { return SUCCESS; } - int oparg = add_const(newconst, consts, const_cache); + int oparg = add_const(newconst, consts, const_cache, consts_index); RETURN_IF_ERROR(oparg); INSTR_SET_OP1(instr, LOAD_CONST, oparg); return SUCCESS; @@ -1447,7 +1462,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst, Called with codestr pointing to the first LOAD_CONST. */ static int -fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache) +fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, + PyObject *const_cache, _Py_hashtable_t *consts_index) { /* Pre-conditions */ assert(PyDict_CheckExact(const_cache)); @@ -1484,7 +1500,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const } nop_out(const_instrs, seq_size); - return instr_make_load_const(instr, const_tuple, consts, const_cache); + return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index); } /* Replace: @@ -1502,7 +1518,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const */ static int fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i, - PyObject *consts, PyObject *const_cache) + PyObject *consts, PyObject *const_cache, + _Py_hashtable_t *consts_index) { assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); @@ -1554,7 +1571,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i, nop_out(&instr, 1); } assert(consts_found == 0); - return instr_make_load_const(intrinsic, newconst, consts, const_cache); + return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index); } if (expect_append) { @@ -1590,7 +1607,8 @@ Optimize lists and sets for: */ static int optimize_lists_and_sets(basicblock *bb, int i, int nextop, - PyObject *consts, PyObject *const_cache) + PyObject *consts, PyObject *const_cache, + _Py_hashtable_t *consts_index) { assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); @@ -1640,7 +1658,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop, Py_SETREF(const_result, frozenset); } - int index = add_const(const_result, consts, const_cache); + int index = add_const(const_result, consts, const_cache, consts_index); RETURN_IF_ERROR(index); nop_out(const_instrs, seq_size); @@ -1837,7 +1855,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right) } static int -fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache) +fold_const_binop(basicblock *bb, int i, PyObject *consts, + PyObject *const_cache, _Py_hashtable_t *consts_index) { #define BINOP_OPERAND_COUNT 2 assert(PyDict_CheckExact(const_cache)); @@ -1879,7 +1898,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache) } nop_out(operands_instrs, BINOP_OPERAND_COUNT); - return instr_make_load_const(binop, newconst, consts, const_cache); + return instr_make_load_const(binop, newconst, consts, const_cache, consts_index); } static PyObject * @@ -1925,7 +1944,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg) } static int -fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache) +fold_const_unaryop(basicblock *bb, int i, PyObject *consts, + PyObject *const_cache, _Py_hashtable_t *consts_index) { #define UNARYOP_OPERAND_COUNT 1 assert(PyDict_CheckExact(const_cache)); @@ -1962,7 +1982,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach assert(PyBool_Check(newconst)); } nop_out(&operand_instr, UNARYOP_OPERAND_COUNT); - return instr_make_load_const(unaryop, newconst, consts, const_cache); + return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index); } #define VISITED (-1) @@ -2157,7 +2177,8 @@ apply_static_swaps(basicblock *block, int i) } static int -basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts) +basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, + PyObject *consts, _Py_hashtable_t *consts_index) { assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); @@ -2272,7 +2293,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject * return ERROR; } cnt = PyBool_FromLong(is_true); - int index = add_const(cnt, consts, const_cache); + int index = add_const(cnt, consts, const_cache, consts_index); if (index < 0) { return ERROR; } @@ -2286,15 +2307,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject * } static int -optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) { +optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts, + _Py_hashtable_t *consts_index) { for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts)); + RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index)); } return SUCCESS; } static int -optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) +optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts, + _Py_hashtable_t *consts_index) { assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); @@ -2334,11 +2357,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) continue; } } - RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache)); + RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index)); break; case BUILD_LIST: case BUILD_SET: - RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache)); + RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index)); break; case POP_JUMP_IF_NOT_NONE: case POP_JUMP_IF_NONE: @@ -2473,7 +2496,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) _Py_FALLTHROUGH; case UNARY_INVERT: case UNARY_NEGATIVE: - RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache)); + RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index)); break; case CALL_INTRINSIC_1: if (oparg == INTRINSIC_LIST_TO_TUPLE) { @@ -2481,15 +2504,15 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) INSTR_SET_OP0(inst, NOP); } else { - RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache)); + RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index)); } } else if (oparg == INTRINSIC_UNARY_POSITIVE) { - RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache)); + RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index)); } break; case BINARY_OP: - RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache)); + RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index)); break; } } @@ -2534,16 +2557,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g) NOPs. Later those NOPs are removed. */ static int -optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno) +optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, + _Py_hashtable_t *consts_index, int firstlineno) { assert(PyDict_CheckExact(const_cache)); RETURN_IF_ERROR(check_cfg(g)); RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock)); RETURN_IF_ERROR(remove_unreachable(g->g_entryblock)); RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno)); - RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts)); + RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index)); for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts)); + RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index)); } RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock)); RETURN_IF_ERROR(remove_unreachable(g->g_entryblock)); @@ -3663,7 +3687,36 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache, RETURN_IF_ERROR(label_exception_targets(g->g_entryblock)); /** Optimization **/ - RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno)); + + /* Auxiliary pointer→index hashtable for O(1) lookup in add_const. */ + _Py_hashtable_t *consts_index = _Py_hashtable_new( + _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct); + if (consts_index == NULL) { + PyErr_NoMemory(); + return ERROR; + } + + /* Seed the index with pre-existing constants. */ + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) { + PyObject *item = PyList_GET_ITEM(consts, i); + if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) { + continue; /* duplicate pointer; keep first occurrence */ + } + if (_Py_hashtable_set(consts_index, (void *)item, + (void *)(uintptr_t)i) < 0) { + _Py_hashtable_destroy(consts_index); + PyErr_NoMemory(); + return ERROR; + } + } + + int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno); + + /* consts_index is invalid after this (consts list may be modified). */ + _Py_hashtable_destroy(consts_index); + + RETURN_IF_ERROR(ret); + RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts)); RETURN_IF_ERROR( add_checks_for_loads_of_uninitialized_variables( From cebc9fc9a01910c381744bf8b867f60bc60d3cdf Mon Sep 17 00:00:00 2001 From: zhuangjinkai Date: Thu, 26 Mar 2026 08:49:54 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst new file mode 100644 index 00000000000000..4d7537f2529da6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-26-08-49-35.gh-issue-146455.f54083a9.rst @@ -0,0 +1 @@ +Fix O(N²) compile-time regression in constant folding after it was moved from AST to CFG optimizer.