Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
84 commits
Select commit Hold shift + click to select a range
776a1e1
Assign threads indices into bytecode copies
mpage Aug 15, 2024
2b40870
Replace most usage of PyCode_CODE
mpage Aug 27, 2024
344d7ad
Get bytecode copying working
mpage Aug 20, 2024
f203d00
Refactor remove_tools
mpage Aug 30, 2024
82b456a
Refactor remove_line_tools
mpage Aug 30, 2024
b021704
Instrument thread-local bytecode
mpage Sep 1, 2024
aea69c5
Use locks for instrumentation
mpage Sep 3, 2024
552277d
Add ifdef guards for each specialization family
mpage Sep 3, 2024
50a6089
Specialize BINARY_OP
mpage Sep 4, 2024
3f1d941
Limit the amount of memory consumed by bytecode copies
mpage Sep 6, 2024
7d2eb27
Make thread-local bytecode limits user configurable
mpage Sep 7, 2024
d5476b9
Fix a few data races when (de)instrumenting opcodes
mpage Sep 8, 2024
e3b367a
Make branch taken recording thread-safe
mpage Sep 8, 2024
b2375bf
Lock thread-local bytecode when specializing
mpage Sep 9, 2024
2707f8e
Load bytecode on RESUME_CHECK
mpage Sep 9, 2024
3fdcb28
Load tlbc on generator.throw()
mpage Sep 9, 2024
4a55ce5
Use tlbc instead of thread_local_bytecode
mpage Sep 9, 2024
8b3ff60
Use tlbc everywhere
mpage Sep 9, 2024
862afa1
Explicitly manage tlbc state
mpage Sep 9, 2024
0b4d952
Refactor API for fetching tlbc
mpage Sep 9, 2024
7795e99
Add unit tests
mpage Sep 10, 2024
693a4cc
Fix initconfig in default build
mpage Sep 10, 2024
b43531e
Fix instrumentation in default build
mpage Sep 10, 2024
9025f43
Synchronize bytecode modifications between specialization and instrum…
mpage Sep 10, 2024
c44c7d9
Add a high-level comment
mpage Sep 10, 2024
e2a6656
Fix unused variable warning in default build
mpage Sep 10, 2024
e6513d1
Fix test_config in free-threaded builds
mpage Sep 10, 2024
a18396f
Fix formatting
mpage Sep 10, 2024
81fe1a2
Remove comment
mpage Sep 10, 2024
837645e
Fix data race in _PyInstruction_GetLength
mpage Sep 10, 2024
f13e132
Fix tier2 optimizer
mpage Sep 11, 2024
942f628
Use __VA_ARGS__ for macros
mpage Sep 11, 2024
66cb24d
Update vcxproj files to include newly added files
mpage Sep 11, 2024
ad12bd4
Mark unused params
mpage Sep 11, 2024
1bbbbbc
Keep tier2 and the JIT disabled in free-threaded builds
mpage Sep 12, 2024
e63e403
Only allow enabling/disabling tlbc
mpage Sep 13, 2024
8b97771
Update libpython for gdb
mpage Sep 13, 2024
d34adeb
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Sep 13, 2024
6d4fe73
Handle out of memory errors
mpage Sep 13, 2024
c2d8693
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Sep 17, 2024
b104782
Fix warnings on windows
mpage Sep 17, 2024
deb5216
Fix another warning
mpage Sep 18, 2024
2f11cc7
Ugh actually fix it
mpage Sep 18, 2024
04f1ac3
Add high-level comment about index pools
mpage Sep 25, 2024
aa330b1
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Sep 25, 2024
7dfd1ca
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Sep 26, 2024
7c9da24
Exclude tlbc from refleak counts
mpage Sep 27, 2024
dd144d0
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Sep 28, 2024
ad180d1
Regen files
mpage Sep 28, 2024
95d2264
Move `get_tlbc_blocks` into the sys module
mpage Sep 30, 2024
b6380de
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Sep 30, 2024
adb59ef
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Oct 5, 2024
39c947d
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Oct 10, 2024
2cc5830
Work around `this_instr` now being const
mpage Oct 11, 2024
96ec126
Make RESUME_CHECK cheaper
mpage Oct 11, 2024
5ecebd9
Pass tstate to _PyCode_GetTLBCFast
mpage Oct 11, 2024
815b2fe
Rename test_tlbc.py to test_thread_local_bytecode.py
mpage Oct 11, 2024
fb90d23
Remove per-family defines for specialization
mpage Oct 11, 2024
4e42414
Replace bytecode pointer with tlbc_index
mpage Oct 13, 2024
814e4ca
Add a test verifying that we clean up tlbc when the code object is de…
mpage Oct 14, 2024
ba3930a
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Oct 14, 2024
cb8a774
Fix indentation
mpage Oct 14, 2024
0f8a55b
Clarify comment
mpage Oct 14, 2024
70ce0fe
Fix TSAN
mpage Oct 14, 2024
f512353
Add test for cleaning up tlbc in correct place, not old emacs buffer
mpage Oct 14, 2024
4be2b1f
Remove test_tlbc.py
mpage Oct 14, 2024
61c7aa9
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Oct 17, 2024
ab6222c
Use int32_t instead of Py_ssize_t for tlbc indices
mpage Oct 17, 2024
6bbb220
Use _PyCode_CODE instead of PyFrame_GetBytecode in super_init_without…
mpage Oct 17, 2024
4580e3c
Update comment
mpage Oct 17, 2024
b992f44
Consolidate _PyCode_{Quicken,DisableSpecialization} into _PyCode_Init…
mpage Oct 17, 2024
4c040d3
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Oct 18, 2024
5b7658c
Fix incorrect types
mpage Oct 18, 2024
bec5bce
Add command-line tests for enabling TLBC
mpage Oct 18, 2024
c9054b7
Update libpython.py for tlbc_index
mpage Oct 18, 2024
1a48ab2
Avoid special casing in _PyEval_GetExecutableCode
mpage Oct 19, 2024
b16ae5f
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Oct 19, 2024
176b24e
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Oct 23, 2024
c107495
Clear TLBC when other caches are cleared
mpage Oct 23, 2024
07f9140
Remove _get_tlbc_blocks
mpage Oct 24, 2024
4cbe237
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Oct 30, 2024
38ff315
Rename _PyCode_InitCounters back to _PyCode_Quicken
mpage Oct 30, 2024
338f7e5
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Nov 4, 2024
bcd1bb2
Merge branch 'main' into gh-115999-thread-local-bytecode
mpage Nov 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix formatting
  • Loading branch information
mpage committed Sep 10, 2024
commit a18396fa60ea54b04e3a375c951fa27e0288f31a
3 changes: 2 additions & 1 deletion Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,8 @@ typedef enum {
_PY_TLBC_UNLIMITED = 0,

// The total amount of memory consumed by thread-local bytecode must be
// <= PyInterpreterState::tlbc_limit. State transitions to _PY_TLBC_DISABLED
// <= PyInterpreterState::tlbc_limit. State transitions to
// _PY_TLBC_DISABLED
// when the limit is reached.
_PY_TLBC_LIMITED = 1,

Expand Down
7 changes: 5 additions & 2 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) {
return (PyCodeObject *)f->f_executable;
}

static inline _Py_CODEUNIT *_PyFrame_GetBytecode(_PyInterpreterFrame *f) {
static inline _Py_CODEUNIT *
_PyFrame_GetBytecode(_PyInterpreterFrame *f)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You were storing the bytecode in the frame directly before, IIRC.
This looks more expensive, and is used on at least one fast path:
https://github.com/python/cpython/pull/123926/files#diff-729a985b0cb8b431cb291f1edb561bbbfea22e3f8c262451cd83328a0936a342R4821

Does it makes things faster overall, or is it just more compact?

Copy link
Copy Markdown
Contributor Author

@mpage mpage Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You were storing the bytecode in the frame directly before, IIRC.

Yep. You suggested storing tlbc_index instead since it was smaller. I think this is better for a couple of reasons:

  1. It's smaller, as you said.
  2. It simplifies and speeds up the implementation of RESUME_CHECK. Previously, we would have to load the bytecode pointer for the current thread and deopt if it didn't match what was in the frame. Now we only have to compare tlbc indices. This is a cost shift, however, since now the callers of _PyFrame_GetBytecode have to do the more expensive load of the bytecode. I think the size reduction + simplification of RESUME_CHECK probably outweighs the higher cost of _PyFrame_GetBytecode. tier2 is also still disabled in free-threaded builds, so it's a bit hard to evaluate the relative cost of slower trace exits vs faster RESUME_CHECKs. Once we get tier2 enabled we can reevaluate.

{
#ifdef Py_GIL_DISABLED
return f->bytecode;
#else
Expand Down Expand Up @@ -228,7 +230,8 @@ _PyFrame_IsIncomplete(_PyInterpreterFrame *frame)
return true;
}
return frame->owner != FRAME_OWNED_BY_GENERATOR &&
frame->instr_ptr < _PyFrame_GetBytecode(frame) + _PyFrame_GetCode(frame)->_co_firsttraceable;
frame->instr_ptr < _PyFrame_GetBytecode(frame) +
_PyFrame_GetCode(frame)->_co_firsttraceable;
}

static inline _PyInterpreterFrame *
Expand Down
3 changes: 2 additions & 1 deletion Include/internal/pycore_interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,8 @@ struct _is {
struct _Py_type_id_pool type_ids;
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
_PyIndexPool tlbc_indices;
// Number of bytes available for thread-local bytecode, counts down to zero.
// Number of bytes available for thread-local bytecode, counts down to
// zero.
Py_ssize_t tlbc_avail;
PyMutex tlbc_avail_mutex;
_Py_TLBC_State tlbc_state;
Expand Down
1 change: 0 additions & 1 deletion Modules/_opcode.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,6 @@ opcode_functions[] = {
{NULL, NULL, 0, NULL}
};


static int
_opcode_exec(PyObject *m) {
#define ADD(X) \
Expand Down
12 changes: 6 additions & 6 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1969,13 +1969,14 @@ static PyCodeObject *
get_code(PyObject *obj)
{
if (PyCode_Check(obj)) {
return (PyCodeObject *) obj;
return (PyCodeObject *)obj;
}
else if (PyFunction_Check(obj)) {
return (PyCodeObject *) PyFunction_GetCode(obj);
return (PyCodeObject *)PyFunction_GetCode(obj);
}
return (PyCodeObject *) PyErr_Format(PyExc_TypeError,
"expected function or code object, got %s", Py_TYPE(obj)->tp_name);
return (PyCodeObject *)PyErr_Format(
PyExc_TypeError, "expected function or code object, got %s",
Py_TYPE(obj)->tp_name);
}

static PyObject *
Expand All @@ -1989,7 +1990,7 @@ get_tlbc(PyObject *Py_UNUSED(module), PyObject *obj)
if (bc == NULL) {
Py_RETURN_NONE;
}
return PyBytes_FromStringAndSize((const char *) bc, _PyCode_NBYTES(code));
return PyBytes_FromStringAndSize((const char *)bc, _PyCode_NBYTES(code));
}

static PyObject *
Expand Down Expand Up @@ -2090,7 +2091,6 @@ identify_type_slot_wrappers(PyObject *self, PyObject *Py_UNUSED(ignored))
return _PyType_GetSlotWrapperNames();
}


static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
Expand Down
68 changes: 36 additions & 32 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2699,14 +2699,15 @@ _PyCode_Fini(PyInterpreterState *interp)
// The total amount of memory consumed by thread-local bytecode can be limited
// at runtime by setting either `-X tlbc_limit` or `PYTHON_TLBC_LIMIT`. When
// the limit is reached, no new copies of thread-local bytecode can be created
// and specialization is disabled for the "main" copy of the bytecode (the bytecode
// at index 0 of the `co_tlbc` array). Threads can continue to specialize
// existing thread-local copies of the bytecode (other than the "main" copy).
// All other execution will use the unspecialized, "main" copy of the bytecode.
// and specialization is disabled for the "main" copy of the bytecode (the
// bytecode at index 0 of the `co_tlbc` array). Threads can continue to
// specialize existing thread-local copies of the bytecode (other than the
// "main" copy). All other execution will use the unspecialized, "main" copy of
// the bytecode.
//
// Concurrent modifications to the bytecode made by the specializing interpreter
// and instrumentation use atomics, with specialization taking care not to
// overwrite an instruction that was instrumented concurrently.
// Concurrent modifications to the bytecode made by the specializing
// interpreter and instrumentation use atomics, with specialization taking care
// not to overwrite an instruction that was instrumented concurrently.

void
_PyCode_InitState(PyInterpreterState *interp)
Expand Down Expand Up @@ -2735,14 +2736,15 @@ _Py_ReserveTLBCIndex(PyInterpreterState *interp)
void
_Py_ClearTLBCIndex(_PyThreadStateImpl *tstate)
{
PyInterpreterState *interp = ((PyThreadState*) tstate)->interp;
PyInterpreterState *interp = ((PyThreadState *)tstate)->interp;
_PyIndexPool_FreeIndex(&interp->tlbc_indices, tstate->tlbc_index);
}

static _PyCodeArray *
_PyCodeArray_New(Py_ssize_t size)
{
_PyCodeArray *arr = PyMem_Calloc(1, sizeof(_PyCodeArray) + sizeof(void*) * size);
_PyCodeArray *arr =
PyMem_Calloc(1, sizeof(_PyCodeArray) + sizeof(void *) * size);
if (arr == NULL) {
PyErr_NoMemory();
return NULL;
Expand Down Expand Up @@ -2787,7 +2789,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx)
if (new_tlbc == NULL) {
return NULL;
}
memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void*));
memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *));
_Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc);
_PyMem_FreeDelayed(tlbc);
tlbc = new_tlbc;
Expand All @@ -2811,27 +2813,27 @@ reserve_bytes_for_tlbc(PyCodeObject *co)
PyMutex_LockFlags(&interp->tlbc_avail_mutex, _Py_LOCK_DONT_DETACH);
Py_ssize_t nbytes_reserved;
switch (interp->tlbc_state) {
case _PY_TLBC_UNLIMITED: {
nbytes_reserved = code_size;
break;
}
case _PY_TLBC_LIMITED: {
if (interp->tlbc_avail >= code_size) {
case _PY_TLBC_UNLIMITED: {
nbytes_reserved = code_size;
interp->tlbc_avail -= code_size;
break;
}
else {
case _PY_TLBC_LIMITED: {
if (interp->tlbc_avail >= code_size) {
nbytes_reserved = code_size;
interp->tlbc_avail -= code_size;
}
else {
nbytes_reserved = -1;
}
break;
}
case _PY_TLBC_DISABLED: {
nbytes_reserved = -1;
break;
}
default: {
Py_UNREACHABLE();
}
break;
}
case _PY_TLBC_DISABLED: {
nbytes_reserved = -1;
break;
}
default: {
Py_UNREACHABLE();
}
}
PyMutex_Unlock(&interp->tlbc_avail_mutex);
return nbytes_reserved;
Expand All @@ -2853,12 +2855,12 @@ release_bytes_for_tlbc(Py_ssize_t nbytes)
}

static int
disable_specialization(PyObject *obj, void*)
disable_specialization(PyObject *obj, void *)
{
if (!PyCode_Check(obj)) {
return 1;
}
PyCodeObject *co = (PyCodeObject *) obj;
PyCodeObject *co = (PyCodeObject *)obj;
_PyCode_DisableSpecialization(_PyCode_CODE(co), Py_SIZE(co));
return 1;
}
Expand All @@ -2881,7 +2883,9 @@ disable_new_tlbc(void)
interp->tlbc_state = _PY_TLBC_DISABLED;
_PyEval_StartTheWorld(interp);
PyUnstable_GC_VisitObjects(disable_specialization, NULL);
if (PyErr_WarnEx(PyExc_ResourceWarning, "Reached memory limit for thread-local bytecode", 1) < 0) {
if (PyErr_WarnEx(PyExc_ResourceWarning,
"Reached memory limit for thread-local bytecode",
1) < 0) {
PyErr_WriteUnraisable(NULL);
}
}
Expand All @@ -2890,10 +2894,10 @@ static _Py_CODEUNIT *
get_tlbc_lock_held(PyCodeObject *co)
{
_PyCodeArray *tlbc = co->co_tlbc;
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *) PyThreadState_GET();
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
Py_ssize_t idx = tstate->tlbc_index;
if (idx < tlbc->size && tlbc->entries[idx] != NULL) {
return (_Py_CODEUNIT *) tlbc->entries[idx];
return (_Py_CODEUNIT *)tlbc->entries[idx];
}
Py_ssize_t reserved = reserve_bytes_for_tlbc(co);
if (reserved == -1) {
Expand Down
6 changes: 3 additions & 3 deletions Objects/frameobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1865,9 +1865,9 @@ frame_init_get_vars(_PyInterpreterFrame *frame)
// here:
PyCodeObject *co = _PyFrame_GetCode(frame);
int lasti = _PyInterpreterFrame_LASTI(frame);
if (!(lasti < 0 && _PyFrame_GetBytecode(frame)->op.code == COPY_FREE_VARS
&& PyFunction_Check(frame->f_funcobj)))
{
if (!(lasti < 0 &&
_PyFrame_GetBytecode(frame)->op.code == COPY_FREE_VARS &&
PyFunction_Check(frame->f_funcobj))) {
/* Free vars are initialized */
return;
}
Expand Down
4 changes: 2 additions & 2 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -11547,8 +11547,8 @@ super_descr_get(PyObject *self, PyObject *obj, PyObject *type)
}

static int
super_init_without_args(_PyInterpreterFrame *cframe,
PyTypeObject **type_p, PyObject **obj_p)
super_init_without_args(_PyInterpreterFrame *cframe, PyTypeObject **type_p,
PyObject **obj_p)
{
PyCodeObject *co = _PyFrame_GetCode(cframe);
if (co->co_argcount == 0) {
Expand Down
3 changes: 2 additions & 1 deletion Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
* we need to update instrumentation */
#ifdef Py_GIL_DISABLED
/* Load thread-local bytecode */
_Py_CODEUNIT *bytecode = _PyCode_GetExecutableCode(_PyFrame_GetCode(frame));
_Py_CODEUNIT *bytecode =
_PyCode_GetExecutableCode(_PyFrame_GetCode(frame));
if (frame->bytecode != bytecode) {
int off = frame->instr_ptr - frame->bytecode;
frame->bytecode = bytecode;
Expand Down
6 changes: 3 additions & 3 deletions Python/ceval_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,9 @@ GETITEM(PyObject *v, Py_ssize_t i) {
* limit is reached and they all execute the main copy of the bytecode. This is
Comment thread
mpage marked this conversation as resolved.
Outdated
* approximate, we do not need the RMW cycle to be atomic.
*/
#define RECORD_BRANCH_TAKEN(bitset, flag) \
FT_ATOMIC_STORE_UINT16_RELAXED(bitset, \
(FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
#define RECORD_BRANCH_TAKEN(bitset, flag) \
FT_ATOMIC_STORE_UINT16_RELAXED( \
bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag))
#else
#define RECORD_BRANCH_TAKEN(bitset, flag)
Comment thread
Yhg1s marked this conversation as resolved.
#endif
Expand Down
3 changes: 2 additions & 1 deletion Python/frame.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame)
// This may be a newly-created generator or coroutine frame. Since it's
// dead anyways, just pretend that the first RESUME ran:
PyCodeObject *code = _PyFrame_GetCode(frame);
frame->instr_ptr = _PyFrame_GetBytecode(frame) + code->_co_firsttraceable + 1;
frame->instr_ptr =
_PyFrame_GetBytecode(frame) + code->_co_firsttraceable + 1;
}
assert(!_PyFrame_IsIncomplete(frame));
assert(f->f_back == NULL);
Expand Down
4 changes: 2 additions & 2 deletions Python/index_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ parent(Py_ssize_t i)
static inline Py_ssize_t
left_child(Py_ssize_t i)
{
return 2*i + 1;
return 2 * i + 1;
}

static inline Py_ssize_t
right_child(Py_ssize_t i)
{
return 2*i + 2;
return 2 * i + 2;
}

static void
Expand Down
35 changes: 19 additions & 16 deletions Python/instrumentation.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,16 @@

#define UNLOCK_CODE() Py_END_CRITICAL_SECTION()

#define MODIFY_BYTECODE(code, func, args...) \
do { \
PyCodeObject *co = (code); \
for (Py_ssize_t i = 0; i < code->co_tlbc->size; i++) { \
char *bc = co->co_tlbc->entries[i]; \
if (bc == NULL) { \
continue; \
} \
(func)((_Py_CODEUNIT *) bc, args); \
} \
#define MODIFY_BYTECODE(code, func, args...) \
do { \
PyCodeObject *co = (code); \
for (Py_ssize_t i = 0; i < code->co_tlbc->size; i++) { \
char *bc = co->co_tlbc->entries[i]; \
if (bc == NULL) { \
continue; \
} \
(func)((_Py_CODEUNIT *)bc, args); \
} \
} while (0)

#else
Expand Down Expand Up @@ -599,7 +599,8 @@ _Py_CODEUNIT
_Py_GetBaseCodeUnit(PyCodeObject *code, int i)
{
_Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i;
_Py_CODEUNIT inst = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)};
_Py_CODEUNIT inst = {
.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)};
int opcode = inst.op.code;
if (opcode < MIN_INSTRUMENTED_OPCODE) {
inst.op.code = _PyOpcode_Deopt[opcode];
Expand Down Expand Up @@ -635,7 +636,8 @@ _Py_GetBaseCodeUnit(PyCodeObject *code, int i)
}

static void
de_instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i, int event)
de_instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i,
int event)
{
assert(event != PY_MONITORING_EVENT_INSTRUCTION);
assert(event != PY_MONITORING_EVENT_LINE);
Expand Down Expand Up @@ -665,7 +667,8 @@ de_instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i, in
}

static void
de_instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
de_instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring,
int i)
{
_Py_CODEUNIT *instr = &bytecode[i];
int opcode = instr->op.code;
Expand Down Expand Up @@ -713,7 +716,6 @@ de_instrument_per_instruction(_Py_CODEUNIT *bytecode,
assert(instr->op.code != INSTRUMENTED_INSTRUCTION);
}


static void
instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
{
Expand All @@ -738,8 +740,9 @@ instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i)
assert(instrumented);
FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, instrumented);
if (_PyOpcode_Caches[deopt]) {
FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.as_counter,
adaptive_counter_warmup().as_counter);
FT_ATOMIC_STORE_UINT16_RELAXED(
instr[1].counter.as_counter,
adaptive_counter_warmup().as_counter);
}
}
}
Expand Down
Loading