From 3347161a3549f83816679d07df9677adc26cb616 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 8 Mar 2026 21:13:33 +0900 Subject: [PATCH 1/6] vm: align CALL/CALL_KW specialization core guards with CPython --- crates/derive-impl/src/pyclass.rs | 2 +- crates/vm/src/builtins/function.rs | 58 ++-- crates/vm/src/builtins/type.rs | 40 ++- crates/vm/src/frame.rs | 461 +++++++++++++++++------------ crates/vm/src/vm/mod.rs | 29 ++ crates/vm/src/vm/thread.rs | 1 + 6 files changed, 379 insertions(+), 212 deletions(-) diff --git a/crates/derive-impl/src/pyclass.rs b/crates/derive-impl/src/pyclass.rs index a65320cdb52..1fec51ddd42 100644 --- a/crates/derive-impl/src/pyclass.rs +++ b/crates/derive-impl/src/pyclass.rs @@ -1021,7 +1021,7 @@ where .iter() .any(|arg| matches!(arg, syn::FnArg::Receiver(_))); let drop_first_typed = match self.inner.attr_name { - AttrName::Method | AttrName::ClassMethod if !has_receiver => 1, + AttrName::Method | AttrName::ClassMethod if !has_receiver && !raw => 1, _ => 0, }; let call_flags = infer_native_call_flags(func.sig(), drop_first_typed); diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 1309e2bd62f..91cf62a0ea6 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -529,6 +529,10 @@ impl PyFunction { } impl Py { + pub(crate) fn is_optimized_for_call_specialization(&self) -> bool { + self.code.flags.contains(bytecode::CodeFlags::OPTIMIZED) + } + pub fn invoke_with_locals( &self, func_args: FuncArgs, @@ -636,43 +640,63 @@ impl Py { new_v } + /// CPython function_kind(SIMPLE_FUNCTION) equivalent for CALL specialization. + /// Returns true if: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonly args. + pub(crate) fn is_simple_for_call_specialization(&self) -> bool { + let code: &Py = &self.code; + let flags = code.flags; + flags.contains(bytecode::CodeFlags::OPTIMIZED) + && !flags.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS) + && code.kwonlyarg_count == 0 + } + /// Check if this function is eligible for exact-args call specialization. - /// Returns true if: no VARARGS, no VARKEYWORDS, no kwonly args, not generator/coroutine, + /// Returns true if: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonly args, /// and effective_nargs matches co_argcount. pub(crate) fn can_specialize_call(&self, effective_nargs: u32) -> bool { let code: &Py = &self.code; let flags = code.flags; - flags.contains(bytecode::CodeFlags::NEWLOCALS) - && !flags.intersects( - bytecode::CodeFlags::VARARGS - | bytecode::CodeFlags::VARKEYWORDS - | bytecode::CodeFlags::GENERATOR - | bytecode::CodeFlags::COROUTINE, - ) + flags.contains(bytecode::CodeFlags::OPTIMIZED) + && !flags.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS) && code.kwonlyarg_count == 0 && code.arg_count == effective_nargs } /// Fast path for calling a simple function with exact positional args. /// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args. - /// Only valid when: no VARARGS, no VARKEYWORDS, no kwonlyargs, not generator/coroutine, + /// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs, /// and nargs == co_argcount. pub fn invoke_exact_args(&self, mut args: Vec, vm: &VirtualMachine) -> PyResult { let code: PyRef = (*self.code).to_owned(); debug_assert_eq!(args.len(), code.arg_count as usize); - debug_assert!(code.flags.contains(bytecode::CodeFlags::NEWLOCALS)); - debug_assert!(!code.flags.intersects( - bytecode::CodeFlags::VARARGS - | bytecode::CodeFlags::VARKEYWORDS - | bytecode::CodeFlags::GENERATOR - | bytecode::CodeFlags::COROUTINE - )); + debug_assert!(code.flags.contains(bytecode::CodeFlags::OPTIMIZED)); + debug_assert!( + !code + .flags + .intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS) + ); debug_assert_eq!(code.kwonlyarg_count, 0); + // Generator/coroutine code objects are SIMPLE_FUNCTION in CPython's + // call specialization classification, but their call path must still + // go through invoke() to produce generator/coroutine objects. + if code + .flags + .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) + { + return self.invoke(FuncArgs::from(args), vm); + } + + let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) { + None + } else { + Some(ArgMapping::from_dict_exact(self.globals.clone())) + }; + let frame = Frame::new( code.clone(), - Scope::new(None, self.globals.clone()), + Scope::new(locals, self.globals.clone()), self.builtins.clone(), self.closure.as_ref().map_or(&[], |c| c.as_slice()), Some(self.to_owned().into()), diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index 65bf1759657..926dc26843e 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -11,7 +11,7 @@ use crate::{ MemberGetter, MemberKind, MemberSetter, PyDescriptorOwned, PyMemberDef, PyMemberDescriptor, }, - function::PyCellRef, + function::{PyCellRef, PyFunction}, tuple::{IntoPyTuple, PyTuple}, }, class::{PyClassImpl, StaticType}, @@ -269,6 +269,7 @@ pub struct HeapTypeExt { pub qualname: PyRwLock, pub slots: Option>>, pub type_data: PyRwLock>, + pub specialization_init: PyRwLock>>, } pub struct PointerSlot(NonNull); @@ -396,6 +397,9 @@ impl PyType { /// Invalidate this type's version tag and cascade to all subclasses. pub fn modified(&self) { + if let Some(ext) = self.heaptype_ext.as_ref() { + *ext.specialization_init.write() = None; + } // If already invalidated, all subclasses must also be invalidated // (guaranteed by the MRO invariant in assign_version_tag). let old_version = self.tp_version_tag.load(Ordering::Acquire); @@ -450,6 +454,7 @@ impl PyType { qualname: PyRwLock::new(name), slots: None, type_data: PyRwLock::new(None), + specialization_init: PyRwLock::new(None), }; let base = bases[0].clone(); @@ -780,6 +785,38 @@ impl PyType { self.find_name_in_mro(attr_name) } + /// Cache __init__ for CALL_ALLOC_AND_ENTER_INIT specialization. + /// The cache is valid only when guarded by the type version check. + pub(crate) fn cache_init_for_specialization( + &self, + init: PyRef, + tp_version: u32, + ) -> bool { + let Some(ext) = self.heaptype_ext.as_ref() else { + return false; + }; + if tp_version == 0 || self.tp_version_tag.load(Ordering::Acquire) != tp_version { + return false; + } + *ext.specialization_init.write() = Some(init); + true + } + + /// Read cached __init__ for CALL_ALLOC_AND_ENTER_INIT specialization. + pub(crate) fn get_cached_init_for_specialization( + &self, + tp_version: u32, + ) -> Option> { + let ext = self.heaptype_ext.as_ref()?; + if tp_version == 0 || self.tp_version_tag.load(Ordering::Acquire) != tp_version { + return None; + } + ext.specialization_init + .read() + .as_ref() + .map(|init| init.to_owned()) + } + pub fn get_direct_attr(&self, attr_name: &'static PyStrInterned) -> Option { self.attributes.read().get(attr_name).cloned() } @@ -1882,6 +1919,7 @@ impl Constructor for PyType { qualname: PyRwLock::new(qualname), slots: heaptype_slots.clone(), type_data: PyRwLock::new(None), + specialization_init: PyRwLock::new(None), }; (slots, heaptype_ext) }; diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 81a4c5d5683..bd894b2b0df 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -4023,12 +4023,27 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + if self.specialization_eval_frame_active(vm) { + return self.execute_call_vectorcall(nargs, vm); + } + if vm.reached_c_stack_limit() || self.specialization_call_recursion_guard(vm) { + return self.execute_call_vectorcall(nargs, vm); + } // Stack: [callable, self_or_null, arg1, ..., argN] + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); let callable = self.nth_value(nargs + 1); if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { + let effective_nargs = nargs + u32::from(self_or_null_is_some); + if !func.can_specialize_call(effective_nargs) { + return self.execute_call_vectorcall(nargs, vm); + } let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); @@ -4045,11 +4060,7 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } } Instruction::CallBoundMethodExactArgs => { @@ -4057,6 +4068,12 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + if self.specialization_eval_frame_active(vm) { + return self.execute_call_vectorcall(nargs, vm); + } + if vm.reached_c_stack_limit() || self.specialization_call_recursion_guard(vm) { + return self.execute_call_vectorcall(nargs, vm); + } // Stack: [callable, self_or_null(NULL), arg1, ..., argN] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self @@ -4073,6 +4090,9 @@ impl ExecutingFrame<'_> { && func.func_version() == cached_version && cached_version != 0 { + if !func.can_specialize_call(nargs + 1) { + return self.execute_call_vectorcall(nargs, vm); + } let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); self.pop_value_opt(); // null (self_or_null) @@ -4085,24 +4105,22 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallLen => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, null, arg] let obj = self.pop_value(); // arg let null = self.pop_value_opt(); let callable = self.pop_value(); - let callable_ptr = &*callable as *const PyObject as usize; - if null.is_none() && cached_ptr == callable_ptr { + if null.is_none() + && vm + .callable_cache + .len + .as_ref() + .is_some_and(|len_callable| callable.is(len_callable)) + { let len = obj.length(vm)?; self.push_value(vm.ctx.new_int(len).into()); return Ok(None); @@ -4112,16 +4130,9 @@ impl ExecutingFrame<'_> { self.push_value_opt(null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallIsinstance => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self @@ -4131,8 +4142,12 @@ impl ExecutingFrame<'_> { let effective_nargs = nargs + u32::from(self_or_null_is_some); if effective_nargs == 2 { let callable = self.nth_value(nargs + 1); - let callable_ptr = callable as *const PyObject as usize; - if cached_ptr == callable_ptr { + if vm + .callable_cache + .isinstance + .as_ref() + .is_some_and(|isinstance_callable| callable.is(isinstance_callable)) + { let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -4147,11 +4162,7 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallType1 => { let nargs: u32 = arg.into(); @@ -4170,11 +4181,7 @@ impl ExecutingFrame<'_> { self.push_value_opt(null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallStr1 => { let nargs: u32 = arg.into(); @@ -4191,11 +4198,7 @@ impl ExecutingFrame<'_> { self.push_value_opt(null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallTuple1 => { let nargs: u32 = arg.into(); @@ -4217,11 +4220,7 @@ impl ExecutingFrame<'_> { self.push_value_opt(null); self.push_value(obj); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallBuiltinO => { let nargs: u32 = arg.into(); @@ -4240,6 +4239,9 @@ impl ExecutingFrame<'_> { | PyMethodFlags::O | PyMethodFlags::KEYWORDS); if call_conv == PyMethodFlags::O && effective_nargs == 1 { + if vm.reached_c_stack_limit() { + return self.execute_call_vectorcall(nargs, vm); + } let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -4289,9 +4291,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallPyGeneral => { @@ -4299,6 +4298,12 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + if self.specialization_eval_frame_active(vm) { + return self.execute_call_vectorcall(nargs, vm); + } + if self.specialization_call_recursion_guard(vm) { + return self.execute_call_vectorcall(nargs, vm); + } let callable = self.nth_value(nargs + 1); if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version @@ -4321,11 +4326,7 @@ impl ExecutingFrame<'_> { self.push_value(result); Ok(None) } else { - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } } Instruction::CallBoundMethodGeneral => { @@ -4333,6 +4334,12 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + if self.specialization_eval_frame_active(vm) { + return self.execute_call_vectorcall(nargs, vm); + } + if self.specialization_call_recursion_guard(vm) { + return self.execute_call_vectorcall(nargs, vm); + } let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self .localsplus @@ -4369,22 +4376,25 @@ impl ExecutingFrame<'_> { self.execute_call_vectorcall(nargs, vm) } Instruction::CallListAppend => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - let cached_ptr = self.code.instructions.read_cache_ptr(cache_base + 1); let nargs: u32 = arg.into(); if nargs == 1 { // Stack: [callable, self_or_null, item] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); let callable = self.nth_value(2); - let callable_ptr = callable as *const PyObject as usize; let self_is_list = self .localsplus .stack_index(stack_len - 2) .as_ref() .is_some_and(|obj| obj.downcast_ref::().is_some()); - if cached_ptr == callable_ptr && self_or_null_is_some && self_is_list { + if vm + .callable_cache + .list_append + .as_ref() + .is_some_and(|list_append| callable.is(list_append)) + && self_or_null_is_some + && self_is_list + { let item = self.pop_value(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); @@ -4404,25 +4414,21 @@ impl ExecutingFrame<'_> { self.push_value(item); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); - let args = self.collect_positional_args(nargs); - self.execute_call(args, vm) + self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorNoargs => { let nargs: u32 = arg.into(); - if nargs == 0 { - // Stack: [callable, self_or_null] — peek to get func ptr - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self.localsplus.stack_index(stack_len - 1).is_some(); - let callable = self.nth_value(1); - let descr = if self_or_null_is_some { - callable.downcast_ref_if_exact::(vm) - } else { - None - }; - if let Some(descr) = descr + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); + let total_nargs = nargs + u32::from(self_or_null_is_some); + if total_nargs == 1 { + let callable = self.nth_value(nargs + 1); + let self_index = + stack_len - nargs as usize - 1 + usize::from(!self_or_null_is_some); + if let Some(descr) = callable.downcast_ref_if_exact::(vm) && descr.method.flags.contains(PyMethodFlags::METHOD) && (descr.method.flags & (PyMethodFlags::VARARGS @@ -4433,15 +4439,25 @@ impl ExecutingFrame<'_> { == PyMethodFlags::NOARGS && self .localsplus - .stack_index(stack_len - 1) + .stack_index(self_index) .as_ref() .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) { + if vm.reached_c_stack_limit() { + return self.execute_call_vectorcall(nargs, vm); + } let func = descr.method.func; - let self_val = self.pop_value_opt().unwrap(); + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + let self_or_null = self.pop_value_opt(); self.pop_value(); // callable + let mut all_args = Vec::with_capacity(total_nargs as usize); + if let Some(self_val) = self_or_null { + all_args.push(self_val); + } + all_args.extend(positional_args); let args = FuncArgs { - args: vec![self_val], + args: all_args, kwargs: Default::default(), }; let result = func(vm, args)?; @@ -4453,17 +4469,17 @@ impl ExecutingFrame<'_> { } Instruction::CallMethodDescriptorO => { let nargs: u32 = arg.into(); - if nargs == 1 { - // Stack: [callable, self_or_null, arg1] - let stack_len = self.localsplus.stack_len(); - let self_or_null_is_some = self.localsplus.stack_index(stack_len - 2).is_some(); - let callable = self.nth_value(2); - let descr = if self_or_null_is_some { - callable.downcast_ref_if_exact::(vm) - } else { - None - }; - if let Some(descr) = descr + let stack_len = self.localsplus.stack_len(); + let self_or_null_is_some = self + .localsplus + .stack_index(stack_len - nargs as usize - 1) + .is_some(); + let total_nargs = nargs + u32::from(self_or_null_is_some); + if total_nargs == 2 { + let callable = self.nth_value(nargs + 1); + let self_index = + stack_len - nargs as usize - 1 + usize::from(!self_or_null_is_some); + if let Some(descr) = callable.downcast_ref_if_exact::(vm) && descr.method.flags.contains(PyMethodFlags::METHOD) && (descr.method.flags & (PyMethodFlags::VARARGS @@ -4474,16 +4490,25 @@ impl ExecutingFrame<'_> { == PyMethodFlags::O && self .localsplus - .stack_index(stack_len - 2) + .stack_index(self_index) .as_ref() .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) { + if vm.reached_c_stack_limit() { + return self.execute_call_vectorcall(nargs, vm); + } let func = descr.method.func; - let obj = self.pop_value(); - let self_val = self.pop_value_opt().unwrap(); + let positional_args: Vec = + self.pop_multiple(nargs as usize).collect(); + let self_or_null = self.pop_value_opt(); self.pop_value(); // callable + let mut all_args = Vec::with_capacity(total_nargs as usize); + if let Some(self_val) = self_or_null { + all_args.push(self_val); + } + all_args.extend(positional_args); let args = FuncArgs { - args: vec![self_val, obj], + args: all_args, kwargs: Default::default(), }; let result = func(vm, args)?; @@ -4495,18 +4520,17 @@ impl ExecutingFrame<'_> { } Instruction::CallMethodDescriptorFast => { let nargs: u32 = arg.into(); - let callable = self.nth_value(nargs + 1); let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let descr = if self_or_null_is_some { - callable.downcast_ref_if_exact::(vm) - } else { - None - }; - if let Some(descr) = descr + let total_nargs = nargs + u32::from(self_or_null_is_some); + let callable = self.nth_value(nargs + 1); + let self_index = + stack_len - nargs as usize - 1 + usize::from(!self_or_null_is_some); + if total_nargs > 0 + && let Some(descr) = callable.downcast_ref_if_exact::(vm) && descr.method.flags.contains(PyMethodFlags::METHOD) && (descr.method.flags & (PyMethodFlags::VARARGS @@ -4517,17 +4541,19 @@ impl ExecutingFrame<'_> { == PyMethodFlags::FASTCALL && self .localsplus - .stack_index(stack_len - nargs as usize - 1) + .stack_index(self_index) .as_ref() .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) { let func = descr.method.func; let positional_args: Vec = self.pop_multiple(nargs as usize).collect(); - let self_val = self.pop_value_opt().unwrap(); + let self_or_null = self.pop_value_opt(); self.pop_value(); // callable - let mut all_args = Vec::with_capacity(nargs as usize + 1); - all_args.push(self_val); + let mut all_args = Vec::with_capacity(total_nargs as usize); + if let Some(self_val) = self_or_null { + all_args.push(self_val); + } all_args.extend(positional_args); let args = FuncArgs { args: all_args, @@ -4564,9 +4590,6 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallAllocAndEnterInit => { @@ -4580,71 +4603,75 @@ impl ExecutingFrame<'_> { .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - if !self_or_null_is_some + if !self.specialization_eval_frame_active(vm) + && !self_or_null_is_some && cached_version != 0 && let Some(cls) = callable.downcast_ref::() && cls.tp_version_tag.load(Acquire) == cached_version + && let Some(init_func) = cls.get_cached_init_for_specialization(cached_version) { - // Look up __init__ (guarded by type_version) - if let Some(init) = cls.get_attr(identifier!(vm, __init__)) - && let Some(init_func) = init.downcast_ref_if_exact::(vm) - && init_func.can_specialize_call(nargs + 1) + if vm.reached_c_stack_limit() { + return self.execute_call_vectorcall(nargs, vm); + } + // Allocate object directly (tp_new == object.__new__) + let dict = if cls + .slots + .flags + .has_feature(crate::types::PyTypeFlags::HAS_DICT) { - // Allocate object directly (tp_new == object.__new__) - let dict = if cls - .slots - .flags - .has_feature(crate::types::PyTypeFlags::HAS_DICT) - { - Some(vm.ctx.new_dict()) - } else { - None - }; - let cls_ref = cls.to_owned(); - let new_obj: PyObjectRef = - PyRef::new_ref(PyBaseObject, cls_ref, dict).into(); - - // Build args: [new_obj, arg1, ..., argN] - let pos_args: Vec = - self.pop_multiple(nargs as usize).collect(); - let _null = self.pop_value_opt(); // self_or_null (None) - let _callable = self.pop_value(); // callable (type) + Some(vm.ctx.new_dict()) + } else { + None + }; + let cls_ref = cls.to_owned(); + let new_obj: PyObjectRef = PyRef::new_ref(PyBaseObject, cls_ref, dict).into(); - let mut all_args = Vec::with_capacity(pos_args.len() + 1); - all_args.push(new_obj.clone()); - all_args.extend(pos_args); + // Build args: [new_obj, arg1, ..., argN] + let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); + let _null = self.pop_value_opt(); // self_or_null (None) + let _callable = self.pop_value(); // callable (type) - let init_result = init_func.invoke_exact_args(all_args, vm)?; + let mut all_args = Vec::with_capacity(pos_args.len() + 1); + all_args.push(new_obj.clone()); + all_args.extend(pos_args); - // EXIT_INIT_CHECK: __init__ must return None - if !vm.is_none(&init_result) { - return Err(vm.new_type_error("__init__() should return None")); - } + let init_result = if init_func.can_specialize_call(all_args.len() as u32) { + init_func.invoke_exact_args(all_args, vm)? + } else { + let args = FuncArgs { + args: all_args, + kwargs: Default::default(), + }; + init_func.invoke(args, vm)? + }; - self.push_value(new_obj); - return Ok(None); + // EXIT_INIT_CHECK: __init__ must return None + if !vm.is_none(&init_result) { + return Err(vm.new_type_error(format!( + "__init__() should return None, not '{}'", + init_result.class().name() + ))); } + + self.push_value(new_obj); + return Ok(None); } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallMethodDescriptorFastWithKeywords => { // Native function interface is uniform regardless of keyword support let nargs: u32 = arg.into(); - let callable = self.nth_value(nargs + 1); let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let descr = if self_or_null_is_some { - callable.downcast_ref_if_exact::(vm) - } else { - None - }; - if let Some(descr) = descr + let total_nargs = nargs + u32::from(self_or_null_is_some); + let callable = self.nth_value(nargs + 1); + let self_index = + stack_len - nargs as usize - 1 + usize::from(!self_or_null_is_some); + if total_nargs > 0 + && let Some(descr) = callable.downcast_ref_if_exact::(vm) && descr.method.flags.contains(PyMethodFlags::METHOD) && (descr.method.flags & (PyMethodFlags::VARARGS @@ -4655,17 +4682,19 @@ impl ExecutingFrame<'_> { == (PyMethodFlags::FASTCALL | PyMethodFlags::KEYWORDS) && self .localsplus - .stack_index(stack_len - nargs as usize - 1) + .stack_index(self_index) .as_ref() .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) { let func = descr.method.func; let positional_args: Vec = self.pop_multiple(nargs as usize).collect(); - let self_val = self.pop_value_opt().unwrap(); + let self_or_null = self.pop_value_opt(); self.pop_value(); // callable - let mut all_args = Vec::with_capacity(nargs as usize + 1); - all_args.push(self_val); + let mut all_args = Vec::with_capacity(total_nargs as usize); + if let Some(self_val) = self_or_null { + all_args.push(self_val); + } all_args.extend(positional_args); let args = FuncArgs { args: all_args, @@ -4710,9 +4739,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::Call { - argc: Arg::marker(), - }); self.execute_call_vectorcall(nargs, vm) } Instruction::CallNonPyGeneral => { @@ -4754,6 +4780,12 @@ impl ExecutingFrame<'_> { let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + if self.specialization_eval_frame_active(vm) { + return self.execute_call_kw_vectorcall(nargs, vm); + } + if self.specialization_call_recursion_guard(vm) { + return self.execute_call_kw_vectorcall(nargs, vm); + } // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] let callable = self.nth_value(nargs + 2); if let Some(func) = callable.downcast_ref_if_exact::(vm) @@ -4789,17 +4821,16 @@ impl ExecutingFrame<'_> { self.push_value(result); return Ok(None); } - self.deoptimize(Instruction::CallKw { - argc: Arg::marker(), - }); - let args = self.collect_keyword_args(nargs); - self.execute_call(args, vm) + self.execute_call_kw_vectorcall(nargs, vm) } Instruction::CallKwBoundMethod => { let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; let cached_version = self.code.instructions.read_cache_u32(cache_base + 1); let nargs: u32 = arg.into(); + if self.specialization_eval_frame_active(vm) { + return self.execute_call_kw_vectorcall(nargs, vm); + } // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self @@ -4841,11 +4872,7 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize(Instruction::CallKw { - argc: Arg::marker(), - }); - let args = self.collect_keyword_args(nargs); - self.execute_call(args, vm) + self.execute_call_kw_vectorcall(nargs, vm) } Instruction::CallKwNonPy => { let nargs: u32 = arg.into(); @@ -7729,6 +7756,17 @@ impl ExecutingFrame<'_> { } return; } + if !func.is_optimized_for_call_specialization() { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -7781,6 +7819,17 @@ impl ExecutingFrame<'_> { } return; } + if !func.is_optimized_for_call_specialization() { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -7821,8 +7870,7 @@ impl ExecutingFrame<'_> { } // Try to specialize method descriptor calls - if self_or_null_is_some - && let Some(descr) = callable.downcast_ref_if_exact::(vm) + if let Some(descr) = callable.downcast_ref_if_exact::(vm) && descr.method.flags.contains(PyMethodFlags::METHOD) { let call_cache_entries = Instruction::CallListAppend.cache_entries(); @@ -7840,9 +7888,10 @@ impl ExecutingFrame<'_> { | PyMethodFlags::NOARGS | PyMethodFlags::O | PyMethodFlags::KEYWORDS); + let total_nargs = nargs + u32::from(self_or_null_is_some); let new_op = if call_conv == PyMethodFlags::NOARGS { - if nargs != 0 { + if total_nargs != 1 { unsafe { self.code.instructions.write_adaptive_counter( cache_base, @@ -7855,7 +7904,7 @@ impl ExecutingFrame<'_> { } Instruction::CallMethodDescriptorNoargs } else if call_conv == PyMethodFlags::O { - if nargs != 1 { + if total_nargs != 2 { unsafe { self.code.instructions.write_adaptive_counter( cache_base, @@ -7866,16 +7915,15 @@ impl ExecutingFrame<'_> { } return; } - if descr.method.name == "append" - && descr.objclass.is(vm.ctx.types.list_type) + if self_or_null_is_some + && nargs == 1 && next_is_pop_top + && vm + .callable_cache + .list_append + .as_ref() + .is_some_and(|list_append| callable.is(list_append)) { - let callable_ptr = callable as *const PyObject as usize; - unsafe { - self.code - .instructions - .write_cache_ptr(cache_base + 1, callable_ptr); - } Instruction::CallListAppend } else { Instruction::CallMethodDescriptorO @@ -7894,7 +7942,6 @@ impl ExecutingFrame<'_> { // Try to specialize builtin calls if let Some(native) = callable.downcast_ref_if_exact::(vm) { let effective_nargs = nargs + u32::from(self_or_null_is_some); - let callable_ptr = callable as *const PyObject as usize; let call_conv = native.value.flags & (PyMethodFlags::VARARGS | PyMethodFlags::FASTCALL @@ -7914,9 +7961,12 @@ impl ExecutingFrame<'_> { return; } if native.zelf.is_none() - && native.value.name == "len" - && native.module.is_some_and(|m| m.as_str() == "builtins") && nargs == 1 + && vm + .callable_cache + .len + .as_ref() + .is_some_and(|len_callable| callable.is(len_callable)) { Instruction::CallLen } else { @@ -7924,9 +7974,12 @@ impl ExecutingFrame<'_> { } } else if call_conv == PyMethodFlags::FASTCALL { if native.zelf.is_none() - && native.value.name == "isinstance" - && native.module.is_some_and(|m| m.as_str() == "builtins") && effective_nargs == 2 + && vm + .callable_cache + .isinstance + .as_ref() + .is_some_and(|isinstance_callable| callable.is(isinstance_callable)) { Instruction::CallIsinstance } else { @@ -7937,13 +7990,6 @@ impl ExecutingFrame<'_> { } else { Instruction::CallNonPyGeneral }; - if matches!(new_op, Instruction::CallLen | Instruction::CallIsinstance) { - unsafe { - self.code - .instructions - .write_cache_ptr(cache_base + 1, callable_ptr); - } - } self.specialize_at(instr_idx, cache_base, new_op); return; } @@ -7989,10 +8035,12 @@ impl ExecutingFrame<'_> { && cls_new_fn as usize == obj_new_fn as usize && let Some(init) = cls.get_attr(identifier!(vm, __init__)) && let Some(init_func) = init.downcast_ref_if_exact::(vm) - && init_func.can_specialize_call(nargs + 1) + && init_func.is_simple_for_call_specialization() { let version = cls.tp_version_tag.load(Acquire); - if version != 0 { + if version != 0 + && cls.cache_init_for_specialization(init_func.to_owned(), version) + { unsafe { self.code .instructions @@ -8049,6 +8097,17 @@ impl ExecutingFrame<'_> { } return; } + if !func.is_optimized_for_call_specialization() { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -8089,6 +8148,17 @@ impl ExecutingFrame<'_> { } return; } + if !func.is_optimized_for_call_specialization() { + unsafe { + self.code.instructions.write_adaptive_counter( + cache_base, + bytecode::adaptive_counter_backoff( + self.code.instructions.read_adaptive_counter(cache_base), + ), + ); + } + return; + } let version = func.get_version_for_current_state(); if version == 0 { unsafe { @@ -8326,6 +8396,11 @@ impl ExecutingFrame<'_> { vm.use_tracing.get() } + #[inline] + fn specialization_call_recursion_guard(&self, vm: &VirtualMachine) -> bool { + vm.current_recursion_depth().saturating_add(1) >= vm.recursion_limit.get() + } + #[inline] fn for_iter_has_end_for_shape(&self, instr_idx: usize, jump_delta: u32) -> bool { let target_idx = instr_idx diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 6040b0b6f39..0de1c1e2547 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -103,6 +103,7 @@ pub struct VirtualMachine { pub asyncio_running_loop: RefCell>, /// Current running asyncio task for this thread pub asyncio_running_task: RefCell>, + pub(crate) callable_cache: CallableCache, } /// Non-owning frame pointer for the frames stack. @@ -570,6 +571,13 @@ pub(super) fn stw_trace(msg: core::fmt::Arguments<'_>) { } } +#[derive(Clone, Debug, Default)] +pub(crate) struct CallableCache { + pub len: Option, + pub isinstance: Option, + pub list_append: Option, +} + pub struct PyGlobalState { pub config: PyConfig, pub module_defs: BTreeMap<&'static str, &'static builtins::PyModuleDef>, @@ -623,6 +631,19 @@ pub fn process_hash_secret_seed() -> u32 { } impl VirtualMachine { + fn init_callable_cache(&mut self) -> PyResult<()> { + self.callable_cache.len = Some(self.builtins.get_attr("len", self)?); + self.callable_cache.isinstance = Some(self.builtins.get_attr("isinstance", self)?); + let list_append = self + .ctx + .types + .list_type + .get_attr(self.ctx.intern_str("append")) + .ok_or_else(|| self.new_runtime_error("failed to cache list.append".to_owned()))?; + self.callable_cache.list_append = Some(list_append); + Ok(()) + } + /// Bump-allocate `size` bytes from the thread data stack. /// /// # Safety @@ -715,6 +736,7 @@ impl VirtualMachine { async_gen_finalizer: RefCell::new(None), asyncio_running_loop: RefCell::new(None), asyncio_running_task: RefCell::new(None), + callable_cache: CallableCache::default(), }; if vm.state.hash_secret.hash_str("") @@ -849,6 +871,8 @@ impl VirtualMachine { stdlib::thread::init_main_thread_ident(self); stdlib::builtins::init_module(self, &self.builtins); + let callable_cache_init = self.init_callable_cache(); + self.expect_pyresult(callable_cache_init, "failed to initialize callable cache"); stdlib::sys::init_module(self, &self.sys_module, &self.builtins); self.expect_pyresult( stdlib::sys::set_bootstrap_stderr(self), @@ -1498,6 +1522,11 @@ impl VirtualMachine { false } + #[inline(always)] + pub(crate) fn reached_c_stack_limit(&self) -> bool { + self.check_c_stack_overflow() + } + /// Used to run the body of a (possibly) recursive function. It will raise a /// RecursionError if recursive functions are nested far too many times, /// preventing a stack overflow. diff --git a/crates/vm/src/vm/thread.rs b/crates/vm/src/vm/thread.rs index 80529699738..13addacd516 100644 --- a/crates/vm/src/vm/thread.rs +++ b/crates/vm/src/vm/thread.rs @@ -634,6 +634,7 @@ impl VirtualMachine { async_gen_finalizer: RefCell::new(None), asyncio_running_loop: RefCell::new(None), asyncio_running_task: RefCell::new(None), + callable_cache: self.callable_cache.clone(), }; ThreadedVirtualMachine { vm } } From be1fe7107be97663b7070c3b07081354cc7ddda1 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 8 Mar 2026 21:13:39 +0900 Subject: [PATCH 2/6] vm: keep specialization hot on misses and add heaptype getitem parity --- crates/vm/src/builtins/function.rs | 27 ++ crates/vm/src/builtins/object.rs | 33 +- crates/vm/src/builtins/type.rs | 87 +++++- crates/vm/src/frame.rs | 477 +++++++++++------------------ crates/vm/src/types/slot.rs | 2 + crates/vm/src/vm/mod.rs | 13 +- 6 files changed, 314 insertions(+), 325 deletions(-) diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index 91cf62a0ea6..d18fdd19701 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -662,6 +662,33 @@ impl Py { && code.arg_count == effective_nargs } + /// Runtime guard for CALL_*_EXACT_ARGS specialization: check only argcount. + /// Other invariants are guaranteed by function versioning and specialization-time checks. + #[inline] + pub(crate) fn has_exact_argcount(&self, effective_nargs: u32) -> bool { + self.code.arg_count == effective_nargs + } + + /// Bytes required for this function's frame on RustPython's thread datastack. + /// Returns `None` for generator/coroutine code paths that do not push a + /// regular datastack-backed frame in the fast call path. + pub(crate) fn datastack_frame_size_bytes(&self) -> Option { + let code: &Py = &self.code; + if code + .flags + .intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE) + { + return None; + } + let nlocalsplus = code + .varnames + .len() + .checked_add(code.cellvars.len())? + .checked_add(code.freevars.len())?; + let capacity = nlocalsplus.checked_add(code.max_stackdepth as usize)?; + capacity.checked_mul(core::mem::size_of::()) + } + /// Fast path for calling a simple function with exact positional args. /// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args. /// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs, diff --git a/crates/vm/src/builtins/object.rs b/crates/vm/src/builtins/object.rs index b6d355c5933..002b05d38f1 100644 --- a/crates/vm/src/builtins/object.rs +++ b/crates/vm/src/builtins/object.rs @@ -64,19 +64,6 @@ impl Constructor for PyBaseObject { } } - // more or less __new__ operator - // Only create dict if the class has HAS_DICT flag (i.e., __slots__ was not defined - // or __dict__ is in __slots__) - let dict = if cls - .slots - .flags - .has_feature(crate::types::PyTypeFlags::HAS_DICT) - { - Some(vm.ctx.new_dict()) - } else { - None - }; - // Ensure that all abstract methods are implemented before instantiating instance. if let Some(abs_methods) = cls.get_attr(identifier!(vm, __abstractmethods__)) && let Some(unimplemented_abstract_method_count) = abs_methods.length_opt(vm) @@ -109,7 +96,7 @@ impl Constructor for PyBaseObject { } } - Ok(crate::PyRef::new_ref(Self, cls, dict).into()) + generic_alloc(cls, 0, vm) } fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { @@ -117,6 +104,21 @@ impl Constructor for PyBaseObject { } } +pub(crate) fn generic_alloc(cls: PyTypeRef, _nitems: usize, vm: &VirtualMachine) -> PyResult { + // Only create dict if the class has HAS_DICT flag (i.e., __slots__ was not defined + // or __dict__ is in __slots__) + let dict = if cls + .slots + .flags + .has_feature(crate::types::PyTypeFlags::HAS_DICT) + { + Some(vm.ctx.new_dict()) + } else { + None + }; + Ok(crate::PyRef::new_ref(PyBaseObject, cls, dict).into()) +} + impl Initializer for PyBaseObject { type Args = FuncArgs; @@ -561,8 +563,9 @@ pub fn object_set_dict(obj: PyObjectRef, dict: PyDictRef, vm: &VirtualMachine) - } pub fn init(ctx: &'static Context) { - // Manually set init slot - derive macro doesn't generate extend_slots + // Manually set alloc/init slots - derive macro doesn't generate extend_slots // for trait impl that overrides #[pyslot] method + ctx.types.object_type.slots.alloc.store(Some(generic_alloc)); ctx.types .object_type .slots diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index 926dc26843e..7acab9fed8c 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -233,6 +233,9 @@ unsafe impl crate::object::Traverse for PyType { .iter() .map(|(_, v)| v.traverse(tracer_fn)) .count(); + if let Some(ext) = self.heaptype_ext.as_ref() { + ext.specialization_init.read().traverse(tracer_fn); + } } /// type_clear: break reference cycles in type objects @@ -260,6 +263,12 @@ unsafe impl crate::object::Traverse for PyType { out.push(val); } } + if let Some(ext) = self.heaptype_ext.as_ref() + && let Some(mut guard) = ext.specialization_init.try_write() + && let Some(init) = guard.take() + { + out.push(init.into()); + } } } @@ -270,6 +279,8 @@ pub struct HeapTypeExt { pub slots: Option>>, pub type_data: PyRwLock>, pub specialization_init: PyRwLock>>, + pub specialization_getitem: PyRwLock>>, + pub specialization_getitem_version: AtomicU32, } pub struct PointerSlot(NonNull); @@ -399,6 +410,9 @@ impl PyType { pub fn modified(&self) { if let Some(ext) = self.heaptype_ext.as_ref() { *ext.specialization_init.write() = None; + *ext.specialization_getitem.write() = None; + ext.specialization_getitem_version + .store(0, Ordering::Release); } // If already invalidated, all subclasses must also be invalidated // (guaranteed by the MRO invariant in assign_version_tag). @@ -455,6 +469,8 @@ impl PyType { slots: None, type_data: PyRwLock::new(None), specialization_init: PyRwLock::new(None), + specialization_getitem: PyRwLock::new(None), + specialization_getitem_version: AtomicU32::new(0), }; let base = bases[0].clone(); @@ -682,6 +698,7 @@ impl PyType { // slots are fully initialized by make_slots() Self::set_new(&new_type.slots, &new_type.base); + Self::set_alloc(&new_type.slots, &new_type.base); let weakref_type = super::PyWeak::static_type(); for base in new_type.bases.read().iter() { @@ -728,6 +745,7 @@ impl PyType { } Self::set_new(&self.slots, &self.base); + Self::set_alloc(&self.slots, &self.base); } fn set_new(slots: &PyTypeSlots, base: &Option) { @@ -742,6 +760,16 @@ impl PyType { } } + fn set_alloc(slots: &PyTypeSlots, base: &Option) { + if slots.alloc.load().is_none() { + slots.alloc.store( + base.as_ref() + .map(|base| base.slots.alloc.load()) + .unwrap_or(None), + ); + } + } + /// Inherit readonly slots from base type at creation time. /// These slots are not AtomicCell and must be set before the type is used. fn inherit_readonly_slots(slots: &mut PyTypeSlots, base: &Self) { @@ -795,10 +823,14 @@ impl PyType { let Some(ext) = self.heaptype_ext.as_ref() else { return false; }; - if tp_version == 0 || self.tp_version_tag.load(Ordering::Acquire) != tp_version { + if tp_version == 0 { + return false; + } + let mut guard = ext.specialization_init.write(); + if self.tp_version_tag.load(Ordering::Acquire) != tp_version { return false; } - *ext.specialization_init.write() = Some(init); + *guard = Some(init); true } @@ -807,14 +839,59 @@ impl PyType { &self, tp_version: u32, ) -> Option> { + let ext = self.heaptype_ext.as_ref()?; + if tp_version == 0 { + return None; + } + let guard = ext.specialization_init.read(); + if self.tp_version_tag.load(Ordering::Acquire) != tp_version { + return None; + } + guard + .as_ref() + .map(|init| init.to_owned()) + } + + /// Cache __getitem__ for BINARY_OP_SUBSCR_GETITEM specialization. + /// The cache is valid only when guarded by the type version check. + pub(crate) fn cache_getitem_for_specialization( + &self, + getitem: PyRef, + tp_version: u32, + func_version: u32, + ) -> bool { + let Some(ext) = self.heaptype_ext.as_ref() else { + return false; + }; + if tp_version == 0 + || func_version == 0 + || self.tp_version_tag.load(Ordering::Acquire) != tp_version + { + return false; + } + *ext.specialization_getitem.write() = Some(getitem); + ext.specialization_getitem_version + .store(func_version, Ordering::Release); + true + } + + /// Read cached __getitem__ for BINARY_OP_SUBSCR_GETITEM specialization. + pub(crate) fn get_cached_getitem_for_specialization( + &self, + tp_version: u32, + ) -> Option<(PyRef, u32)> { let ext = self.heaptype_ext.as_ref()?; if tp_version == 0 || self.tp_version_tag.load(Ordering::Acquire) != tp_version { return None; } - ext.specialization_init + let cached_version = ext.specialization_getitem_version.load(Ordering::Acquire); + if cached_version == 0 { + return None; + } + ext.specialization_getitem .read() .as_ref() - .map(|init| init.to_owned()) + .map(|getitem| (getitem.to_owned(), cached_version)) } pub fn get_direct_attr(&self, attr_name: &'static PyStrInterned) -> Option { @@ -1920,6 +1997,8 @@ impl Constructor for PyType { slots: heaptype_slots.clone(), type_data: PyRwLock::new(None), specialization_init: PyRwLock::new(None), + specialization_getitem: PyRwLock::new(None), + specialization_getitem_version: AtomicU32::new(0), }; (slots, heaptype_ext) }; diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index bd894b2b0df..c8ff082f672 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3222,9 +3222,6 @@ impl ExecutingFrame<'_> { } } } - self.deoptimize(Instruction::Send { - delta: Arg::marker(), - }); let receiver = self.top_value(); match self._send(receiver, val, vm)? { PyIterReturn::Return(value) => { @@ -3321,8 +3318,7 @@ impl ExecutingFrame<'_> { // Specialized LOAD_ATTR opcodes Instruction::LoadAttrMethodNoDict => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); @@ -3336,20 +3332,12 @@ impl ExecutingFrame<'_> { self.push_value(owner); Ok(None) } else { - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } } Instruction::LoadAttrMethodLazyDict => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); @@ -3364,20 +3352,12 @@ impl ExecutingFrame<'_> { self.push_value(owner); Ok(None) } else { - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } } Instruction::LoadAttrMethodWithValues => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let attr_name = self.code.names[oparg.name_idx() as usize]; let owner = self.top_value(); @@ -3390,23 +3370,7 @@ impl ExecutingFrame<'_> { Ok(Some(_)) => true, Ok(None) => false, Err(_) => { - // Dict lookup error → deoptimize to safe path - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code - .instructions - .read_adaptive_counter(cache_base), - ), - ); - } + // Dict lookup error -> use safe path. return self.load_attr_slow(vm, oparg); } } @@ -3424,19 +3388,11 @@ impl ExecutingFrame<'_> { return Ok(None); } } - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrInstanceValue => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let attr_name = self.code.names[oparg.name_idx() as usize]; let owner = self.top_value(); @@ -3454,19 +3410,11 @@ impl ExecutingFrame<'_> { } // Not in instance dict — fall through to class lookup via slow path } - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrWithHint => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let attr_name = self.code.names[oparg.name_idx() as usize]; let owner = self.top_value(); @@ -3487,19 +3435,11 @@ impl ExecutingFrame<'_> { return Ok(None); } - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrModule => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let attr_name = self.code.names[oparg.name_idx() as usize]; let owner = self.top_value(); @@ -3519,27 +3459,11 @@ impl ExecutingFrame<'_> { } return Ok(None); } - // Deoptimize - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrNondescriptorNoDict => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); @@ -3557,26 +3481,11 @@ impl ExecutingFrame<'_> { } return Ok(None); } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrNondescriptorWithValues => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let attr_name = self.code.names[oparg.name_idx() as usize]; let owner = self.top_value(); @@ -3599,13 +3508,6 @@ impl ExecutingFrame<'_> { // Not in instance dict — use cached class attr let Some(attr) = self.try_read_cached_descriptor(cache_base, type_version) else { - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); return self.load_attr_slow(vm, oparg); }; self.pop_value(); @@ -3617,26 +3519,11 @@ impl ExecutingFrame<'_> { } return Ok(None); } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrClass => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); @@ -3655,26 +3542,11 @@ impl ExecutingFrame<'_> { } return Ok(None); } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrClassWithMetaclassCheck => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); @@ -3696,26 +3568,38 @@ impl ExecutingFrame<'_> { } return Ok(None); } - self.deoptimize_at( - Instruction::LoadAttr { - namei: Arg::marker(), - }, - instr_idx, - cache_base, - ); self.load_attr_slow(vm, oparg) } Instruction::LoadAttrGetattributeOverridden => { let oparg = LoadAttr::new(u32::from(arg)); - self.deoptimize(Instruction::LoadAttr { - namei: Arg::marker(), - }); + let cache_base = self.lasti() as usize; + let owner = self.top_value(); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + let func_version = self.code.instructions.read_cache_u32(cache_base + 3); + + if !oparg.is_method() + && !self.specialization_eval_frame_active(vm) + && type_version != 0 + && func_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some(func_obj) = + self.try_read_cached_descriptor(cache_base, type_version) + && let Some(func) = func_obj.downcast_ref_if_exact::(vm) + && func.func_version() == func_version + && func.has_exact_argcount(2) + && self.specialization_has_datastack_space_for_func(vm, func) + { + let owner = self.pop_value(); + let attr_name = self.code.names[oparg.name_idx() as usize].to_owned().into(); + let result = func.invoke_exact_args(vec![owner, attr_name], vm)?; + self.push_value(result); + return Ok(None); + } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrSlot => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); @@ -3735,55 +3619,29 @@ impl ExecutingFrame<'_> { } // Slot is None → AttributeError (fall through to slow path) } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::LoadAttrProperty => { let oparg = LoadAttr::new(u32::from(arg)); - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; + let cache_base = self.lasti() as usize; let owner = self.top_value(); let type_version = self.code.instructions.read_cache_u32(cache_base + 1); if type_version != 0 + && !self.specialization_eval_frame_active(vm) && owner.class().tp_version_tag.load(Acquire) == type_version - && let Some(descr) = self.try_read_cached_descriptor(cache_base, type_version) - && let Some(prop) = descr.downcast_ref::() - && let Some(getter) = prop.get_fget() + && let Some(fget_obj) = + self.try_read_cached_descriptor(cache_base, type_version) + && let Some(func) = fget_obj.downcast_ref_if_exact::(vm) + && func.can_specialize_call(1) + && self.specialization_has_datastack_space_for_func(vm, func) { let owner = self.pop_value(); - let result = getter.call((owner,), vm)?; + let result = func.invoke_exact_args(vec![owner], vm)?; self.push_value(result); return Ok(None); } - unsafe { - self.code.instructions.replace_op( - instr_idx, - Instruction::LoadAttr { - namei: Arg::marker(), - }, - ); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } self.load_attr_slow(vm, oparg) } Instruction::StoreAttrInstanceValue => { @@ -3912,7 +3770,30 @@ impl ExecutingFrame<'_> { self.execute_bin_op(vm, bytecode::BinaryOperator::Add) } } - Instruction::BinaryOpSubscrGetitem | Instruction::BinaryOpExtend => { + Instruction::BinaryOpSubscrGetitem => { + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let owner = self.nth_value(1); + let type_version = self.code.instructions.read_cache_u32(cache_base + 1); + if !self.specialization_eval_frame_active(vm) + && type_version != 0 + && owner.class().tp_version_tag.load(Acquire) == type_version + && let Some((func, func_version)) = owner + .class() + .get_cached_getitem_for_specialization(type_version) + && func.func_version() == func_version + && func.has_exact_argcount(2) + && self.specialization_has_datastack_space_for_func(vm, &func) + { + let sub = self.pop_value(); + let owner = self.pop_value(); + let result = func.invoke_exact_args(vec![owner, sub], vm)?; + self.push_value(result); + return Ok(None); + } + self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) + } + Instruction::BinaryOpExtend => { let op = bytecode::BinaryOperator::try_from(u32::from(arg)) .unwrap_or(bytecode::BinaryOperator::Subscr); self.execute_bin_op(vm, op) @@ -4026,7 +3907,7 @@ impl ExecutingFrame<'_> { if self.specialization_eval_frame_active(vm) { return self.execute_call_vectorcall(nargs, vm); } - if vm.reached_c_stack_limit() || self.specialization_call_recursion_guard(vm) { + if self.specialization_call_recursion_guard(vm) { return self.execute_call_vectorcall(nargs, vm); } // Stack: [callable, self_or_null, arg1, ..., argN] @@ -4041,7 +3922,10 @@ impl ExecutingFrame<'_> { && cached_version != 0 { let effective_nargs = nargs + u32::from(self_or_null_is_some); - if !func.can_specialize_call(effective_nargs) { + if !func.has_exact_argcount(effective_nargs) { + return self.execute_call_vectorcall(nargs, vm); + } + if !self.specialization_has_datastack_space_for_func(vm, func) { return self.execute_call_vectorcall(nargs, vm); } let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); @@ -4071,7 +3955,7 @@ impl ExecutingFrame<'_> { if self.specialization_eval_frame_active(vm) { return self.execute_call_vectorcall(nargs, vm); } - if vm.reached_c_stack_limit() || self.specialization_call_recursion_guard(vm) { + if self.specialization_call_recursion_guard(vm) { return self.execute_call_vectorcall(nargs, vm); } // Stack: [callable, self_or_null(NULL), arg1, ..., argN] @@ -4090,7 +3974,10 @@ impl ExecutingFrame<'_> { && func.func_version() == cached_version && cached_version != 0 { - if !func.can_specialize_call(nargs + 1) { + if !func.has_exact_argcount(nargs + 1) { + return self.execute_call_vectorcall(nargs, vm); + } + if !self.specialization_has_datastack_space_for_func(vm, func) { return self.execute_call_vectorcall(nargs, vm); } let pos_args: Vec = @@ -4239,9 +4126,6 @@ impl ExecutingFrame<'_> { | PyMethodFlags::O | PyMethodFlags::KEYWORDS); if call_conv == PyMethodFlags::O && effective_nargs == 1 { - if vm.reached_c_stack_limit() { - return self.execute_call_vectorcall(nargs, vm); - } let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -4443,9 +4327,6 @@ impl ExecutingFrame<'_> { .as_ref() .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) { - if vm.reached_c_stack_limit() { - return self.execute_call_vectorcall(nargs, vm); - } let func = descr.method.func; let positional_args: Vec = self.pop_multiple(nargs as usize).collect(); @@ -4494,9 +4375,6 @@ impl ExecutingFrame<'_> { .as_ref() .is_some_and(|self_obj| self_obj.class().is(descr.objclass)) { - if vm.reached_c_stack_limit() { - return self.execute_call_vectorcall(nargs, vm); - } let func = descr.method.func; let positional_args: Vec = self.pop_multiple(nargs as usize).collect(); @@ -4603,28 +4481,23 @@ impl ExecutingFrame<'_> { .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); + let object_alloc = vm.ctx.types.object_type.slots.alloc.load(); if !self.specialization_eval_frame_active(vm) && !self_or_null_is_some && cached_version != 0 && let Some(cls) = callable.downcast_ref::() && cls.tp_version_tag.load(Acquire) == cached_version && let Some(init_func) = cls.get_cached_init_for_specialization(cached_version) + && let (Some(cls_alloc), Some(object_alloc_fn)) = + (cls.slots.alloc.load(), object_alloc) + && cls_alloc as usize == object_alloc_fn as usize { - if vm.reached_c_stack_limit() { + if !self.specialization_has_datastack_space_for_func(vm, &init_func) { return self.execute_call_vectorcall(nargs, vm); } - // Allocate object directly (tp_new == object.__new__) - let dict = if cls - .slots - .flags - .has_feature(crate::types::PyTypeFlags::HAS_DICT) - { - Some(vm.ctx.new_dict()) - } else { - None - }; + // Allocate object directly (tp_new == object.__new__, tp_alloc == generic). let cls_ref = cls.to_owned(); - let new_obj: PyObjectRef = PyRef::new_ref(PyBaseObject, cls_ref, dict).into(); + let new_obj = cls_alloc(cls_ref, 0, vm)?; // Build args: [new_obj, arg1, ..., argN] let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); @@ -4635,15 +4508,7 @@ impl ExecutingFrame<'_> { all_args.push(new_obj.clone()); all_args.extend(pos_args); - let init_result = if init_func.can_specialize_call(all_args.len() as u32) { - init_func.invoke_exact_args(all_args, vm)? - } else { - let args = FuncArgs { - args: all_args, - kwargs: Default::default(), - }; - init_func.invoke(args, vm)? - }; + let init_result = init_func.invoke_exact_args(all_args, vm)?; // EXIT_INIT_CHECK: __init__ must return None if !vm.is_none(&init_result) { @@ -4965,22 +4830,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - // Deoptimize - unsafe { - self.code.instructions.replace_op( - self.lasti() as usize - 1, - Instruction::LoadSuperAttr { - namei: Arg::marker(), - }, - ); - let cache_base = self.lasti() as usize; - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) } @@ -5048,22 +4897,6 @@ impl ExecutingFrame<'_> { return Ok(None); } } - // Deoptimize - unsafe { - self.code.instructions.replace_op( - self.lasti() as usize - 1, - Instruction::LoadSuperAttr { - namei: Arg::marker(), - }, - ); - let cache_base = self.lasti() as usize; - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } let oparg = LoadSuperAttr::new(oparg); self.load_super_attr(vm, oparg) } @@ -5333,9 +5166,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5351,9 +5181,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5369,9 +5196,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -5380,17 +5204,11 @@ impl ExecutingFrame<'_> { let target = bytecode::Label(self.lasti() + 1 + u32::from(arg)); let iter = self.top_value(); if self.specialization_eval_frame_active(vm) { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; return Ok(None); } if let Some(generator) = iter.downcast_ref_if_exact::(vm) { if generator.as_coro().running() || generator.as_coro().closed() { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; return Ok(None); } @@ -5409,9 +5227,6 @@ impl ExecutingFrame<'_> { } Ok(None) } else { - self.deoptimize(Instruction::ForIter { - delta: Arg::marker(), - }); self.execute_for_iter(vm, target)?; Ok(None) } @@ -7157,6 +6972,35 @@ impl ExecutingFrame<'_> { .load() .is_some_and(|f| f as usize == PyBaseObject::getattro as *const () as usize); if !is_default_getattro { + let mut type_version = cls.tp_version_tag.load(Acquire); + if type_version == 0 { + type_version = cls.assign_version_tag(); + } + if type_version != 0 + && !oparg.is_method() + && !self.specialization_eval_frame_active(_vm) + && cls.get_attr(identifier!(_vm, __getattr__)).is_none() + && let Some(getattribute) = cls.get_attr(identifier!(_vm, __getattribute__)) + && let Some(func) = getattribute.downcast_ref_if_exact::(_vm) + && func.can_specialize_call(2) + { + let func_version = func.get_version_for_current_state(); + if func_version != 0 { + let func_ptr = &*getattribute as *const PyObject as usize; + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 3, func_version); + self.write_cached_descriptor(cache_base, type_version, func_ptr); + } + self.specialize_at( + instr_idx, + cache_base, + Instruction::LoadAttrGetattributeOverridden, + ); + return; + } + } unsafe { self.code.instructions.write_adaptive_counter( cache_base, @@ -7263,12 +7107,16 @@ impl ExecutingFrame<'_> { } self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrSlot); } else if let Some(ref descr) = cls_attr - && descr.downcast_ref::().is_some() + && let Some(prop) = descr.downcast_ref::() + && let Some(fget) = prop.get_fget() + && let Some(func) = fget.downcast_ref_if_exact::(_vm) + && func.can_specialize_call(1) + && !self.specialization_eval_frame_active(_vm) { - // Property descriptor — cache the property object pointer - let descr_ptr = &**descr as *const PyObject as usize; + // Property specialization caches fget directly, matching CPython. + let fget_ptr = &*fget as *const PyObject as usize; unsafe { - self.write_cached_descriptor(cache_base, type_version, descr_ptr); + self.write_cached_descriptor(cache_base, type_version, fget_ptr); } self.specialize_at(instr_idx, cache_base, Instruction::LoadAttrProperty); } else { @@ -7578,7 +7426,39 @@ impl ExecutingFrame<'_> { { Some(Instruction::BinaryOpSubscrListSlice) } else { - None + let cls = a.class(); + if cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) + && !self.specialization_eval_frame_active(vm) + && let Some(_getitem) = cls.get_attr(identifier!(vm, __getitem__)) + && let Some(func) = _getitem.downcast_ref_if_exact::(vm) + && func.can_specialize_call(2) + { + let mut type_version = cls.tp_version_tag.load(Acquire); + if type_version == 0 { + type_version = cls.assign_version_tag(); + } + if type_version != 0 { + let func_version = func.get_version_for_current_state(); + if cls.cache_getitem_for_specialization( + func.to_owned(), + type_version, + func_version, + ) { + unsafe { + self.code + .instructions + .write_cache_u32(cache_base + 1, type_version); + } + Some(Instruction::BinaryOpSubscrGetitem) + } else { + None + } + } else { + None + } + } else { + None + } } } bytecode::BinaryOperator::InplaceAdd => { @@ -7647,30 +7527,6 @@ impl ExecutingFrame<'_> { } } - /// Deoptimize: replace specialized op with its base adaptive op and reset - /// the adaptive counter. Computes instr_idx/cache_base from lasti(). - #[inline] - fn deoptimize(&mut self, base_op: Instruction) { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; - self.deoptimize_at(base_op, instr_idx, cache_base); - } - - /// Deoptimize with explicit indices (for specialized handlers that already - /// have instr_idx/cache_base in scope). - #[inline] - fn deoptimize_at(&mut self, base_op: Instruction, instr_idx: usize, cache_base: usize) { - unsafe { - self.code.instructions.replace_op(instr_idx, base_op); - self.code.instructions.write_adaptive_counter( - cache_base, - bytecode::adaptive_counter_backoff( - self.code.instructions.read_adaptive_counter(cache_base), - ), - ); - } - } - /// Execute a specialized binary op on two int operands. /// Fallback to generic binary op if either operand is not an exact int. #[inline] @@ -8031,13 +7887,20 @@ impl ExecutingFrame<'_> { if !self_or_null_is_some && cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) { let object_new = vm.ctx.types.object_type.slots.new.load(); let cls_new = cls.slots.new.load(); - if let (Some(cls_new_fn), Some(obj_new_fn)) = (cls_new, object_new) + let object_alloc = vm.ctx.types.object_type.slots.alloc.load(); + let cls_alloc = cls.slots.alloc.load(); + if let (Some(cls_new_fn), Some(obj_new_fn), Some(cls_alloc_fn), Some(obj_alloc_fn)) = + (cls_new, object_new, cls_alloc, object_alloc) && cls_new_fn as usize == obj_new_fn as usize + && cls_alloc_fn as usize == obj_alloc_fn as usize && let Some(init) = cls.get_attr(identifier!(vm, __init__)) && let Some(init_func) = init.downcast_ref_if_exact::(vm) && init_func.is_simple_for_call_specialization() { - let version = cls.tp_version_tag.load(Acquire); + let mut version = cls.tp_version_tag.load(Acquire); + if version == 0 { + version = cls.assign_version_tag(); + } if version != 0 && cls.cache_init_for_specialization(init_func.to_owned(), version) { @@ -8396,6 +8259,18 @@ impl ExecutingFrame<'_> { vm.use_tracing.get() } + #[inline] + fn specialization_has_datastack_space_for_func( + &self, + vm: &VirtualMachine, + func: &Py, + ) -> bool { + match func.datastack_frame_size_bytes() { + Some(frame_size) => vm.datastack_has_space(frame_size), + None => true, + } + } + #[inline] fn specialization_call_recursion_guard(&self, vm: &VirtualMachine) -> bool { vm.current_recursion_depth().saturating_add(1) >= vm.recursion_limit.get() diff --git a/crates/vm/src/types/slot.rs b/crates/vm/src/types/slot.rs index 60d10194f30..bd390af9546 100644 --- a/crates/vm/src/types/slot.rs +++ b/crates/vm/src/types/slot.rs @@ -174,6 +174,7 @@ pub struct PyTypeSlots { // tp_dictoffset pub init: AtomicCell>, // tp_alloc + pub alloc: AtomicCell>, pub new: AtomicCell>, // tp_free // tp_is_gc @@ -298,6 +299,7 @@ pub(crate) type DescrGetFunc = fn(PyObjectRef, Option, Option, &VirtualMachine) -> PyResult; pub(crate) type DescrSetFunc = fn(&PyObject, PyObjectRef, PySetterValue, &VirtualMachine) -> PyResult<()>; +pub(crate) type AllocFunc = fn(PyTypeRef, usize, &VirtualMachine) -> PyResult; pub(crate) type NewFunc = fn(PyTypeRef, FuncArgs, &VirtualMachine) -> PyResult; pub(crate) type InitFunc = fn(PyObjectRef, FuncArgs, &VirtualMachine) -> PyResult<()>; pub(crate) type DelFunc = fn(&PyObject, &VirtualMachine) -> PyResult<()>; diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index 0de1c1e2547..a56bc16cd38 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -94,6 +94,7 @@ pub struct VirtualMachine { pub initialized: bool, recursion_depth: Cell, /// C stack soft limit for detecting stack overflow (like c_stack_soft_limit) + #[cfg_attr(miri, allow(dead_code))] c_stack_soft_limit: Cell, /// Async generator firstiter hook (per-thread, set via sys.set_asyncgen_hooks) pub async_gen_firstiter: RefCell>, @@ -653,6 +654,12 @@ impl VirtualMachine { unsafe { (*self.datastack.get()).push(size) } } + /// Check whether the thread data stack currently has room for `size` bytes. + #[inline(always)] + pub(crate) fn datastack_has_space(&self, size: usize) -> bool { + unsafe { (*self.datastack.get()).has_space(size) } + } + /// Pop a previous data stack allocation. /// /// # Safety @@ -1414,6 +1421,7 @@ impl VirtualMachine { /// Stack margin bytes (like _PyOS_STACK_MARGIN_BYTES). /// 2048 * sizeof(void*) = 16KB for 64-bit. + #[cfg_attr(miri, allow(dead_code))] const STACK_MARGIN_BYTES: usize = 2048 * core::mem::size_of::(); /// Get the stack boundaries using platform-specific APIs. @@ -1522,11 +1530,6 @@ impl VirtualMachine { false } - #[inline(always)] - pub(crate) fn reached_c_stack_limit(&self) -> bool { - self.check_c_stack_overflow() - } - /// Used to run the body of a (possibly) recursive function. It will raise a /// RecursionError if recursive functions are nested far too many times, /// preventing a stack overflow. From 29f4284009c042ddd4ffa702f9fe28850db619b7 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 8 Mar 2026 21:13:45 +0900 Subject: [PATCH 3/6] vm: align call-alloc/getitem cache guards and call fastpath ordering --- crates/vm/src/builtins/type.rs | 21 ++--- crates/vm/src/frame.rs | 139 ++++++++++++++++----------------- 2 files changed, 74 insertions(+), 86 deletions(-) diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index 7acab9fed8c..a894ab84032 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -847,9 +847,7 @@ impl PyType { if self.tp_version_tag.load(Ordering::Acquire) != tp_version { return None; } - guard - .as_ref() - .map(|init| init.to_owned()) + guard.as_ref().map(|init| init.to_owned()) } /// Cache __getitem__ for BINARY_OP_SUBSCR_GETITEM specialization. @@ -858,15 +856,15 @@ impl PyType { &self, getitem: PyRef, tp_version: u32, - func_version: u32, ) -> bool { let Some(ext) = self.heaptype_ext.as_ref() else { return false; }; - if tp_version == 0 - || func_version == 0 - || self.tp_version_tag.load(Ordering::Acquire) != tp_version - { + if tp_version == 0 || self.tp_version_tag.load(Ordering::Acquire) != tp_version { + return false; + } + let func_version = getitem.get_version_for_current_state(); + if func_version == 0 { return false; } *ext.specialization_getitem.write() = Some(getitem); @@ -876,12 +874,9 @@ impl PyType { } /// Read cached __getitem__ for BINARY_OP_SUBSCR_GETITEM specialization. - pub(crate) fn get_cached_getitem_for_specialization( - &self, - tp_version: u32, - ) -> Option<(PyRef, u32)> { + pub(crate) fn get_cached_getitem_for_specialization(&self) -> Option<(PyRef, u32)> { let ext = self.heaptype_ext.as_ref()?; - if tp_version == 0 || self.tp_version_tag.load(Ordering::Acquire) != tp_version { + if self.tp_version_tag.load(Ordering::Acquire) == 0 { return None; } let cached_version = ext.specialization_getitem_version.load(Ordering::Acquire); diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index c8ff082f672..2d74e98e95e 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -3586,9 +3586,9 @@ impl ExecutingFrame<'_> { self.try_read_cached_descriptor(cache_base, type_version) && let Some(func) = func_obj.downcast_ref_if_exact::(vm) && func.func_version() == func_version - && func.has_exact_argcount(2) && self.specialization_has_datastack_space_for_func(vm, func) { + debug_assert!(func.has_exact_argcount(2)); let owner = self.pop_value(); let attr_name = self.code.names[oparg.name_idx() as usize].to_owned().into(); let result = func.invoke_exact_args(vec![owner, attr_name], vm)?; @@ -3771,20 +3771,14 @@ impl ExecutingFrame<'_> { } } Instruction::BinaryOpSubscrGetitem => { - let instr_idx = self.lasti() as usize - 1; - let cache_base = instr_idx + 1; let owner = self.nth_value(1); - let type_version = self.code.instructions.read_cache_u32(cache_base + 1); if !self.specialization_eval_frame_active(vm) - && type_version != 0 - && owner.class().tp_version_tag.load(Acquire) == type_version - && let Some((func, func_version)) = owner - .class() - .get_cached_getitem_for_specialization(type_version) + && let Some((func, func_version)) = + owner.class().get_cached_getitem_for_specialization() && func.func_version() == func_version - && func.has_exact_argcount(2) && self.specialization_has_datastack_space_for_func(vm, &func) { + debug_assert!(func.has_exact_argcount(2)); let sub = self.pop_value(); let owner = self.pop_value(); let result = func.invoke_exact_args(vec![owner, sub], vm)?; @@ -3804,19 +3798,17 @@ impl ExecutingFrame<'_> { if let (Some(list), Some(idx)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Ok(i) = idx.try_to_primitive::(vm) + ) && let Ok(i) = idx.try_to_primitive::(vm) { let vec = list.borrow_vec(); - if let Some(pos) = vec.wrap_index(i) { - let value = vec.do_get(pos); + if i < vec.len() { + let value = vec.do_get(i); drop(vec); self.pop_value(); self.pop_value(); self.push_value(value); return Ok(None); } - drop(vec); - return Err(vm.new_index_error("list index out of range")); } self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } @@ -3826,17 +3818,16 @@ impl ExecutingFrame<'_> { if let (Some(tuple), Some(idx)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Ok(i) = idx.try_to_primitive::(vm) + ) && let Ok(i) = idx.try_to_primitive::(vm) { let elements = tuple.as_slice(); - if let Some(pos) = elements.wrap_index(i) { - let value = elements[pos].clone(); + if i < elements.len() { + let value = elements[i].clone(); self.pop_value(); self.pop_value(); self.push_value(value); return Ok(None); } - return Err(vm.new_index_error("tuple index out of range")); } self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } @@ -3869,19 +3860,14 @@ impl ExecutingFrame<'_> { if let (Some(a_str), Some(b_int)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Ok(i) = b_int.try_to_primitive::(vm) + ) && let Ok(i) = b_int.try_to_primitive::(vm) + && let Ok(ch) = a_str.getitem_by_index(vm, i as isize) + && ch.is_ascii() { - match a_str.getitem_by_index(vm, i) { - Ok(ch) => { - self.pop_value(); - self.pop_value(); - self.push_value(PyStr::from(ch).into_pyobject(vm)); - return Ok(None); - } - Err(e) => { - return Err(e); - } - } + self.pop_value(); + self.pop_value(); + self.push_value(PyStr::from(ch).into_pyobject(vm)); + return Ok(None); } self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } @@ -3907,9 +3893,6 @@ impl ExecutingFrame<'_> { if self.specialization_eval_frame_active(vm) { return self.execute_call_vectorcall(nargs, vm); } - if self.specialization_call_recursion_guard(vm) { - return self.execute_call_vectorcall(nargs, vm); - } // Stack: [callable, self_or_null, arg1, ..., argN] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self @@ -3928,6 +3911,9 @@ impl ExecutingFrame<'_> { if !self.specialization_has_datastack_space_for_func(vm, func) { return self.execute_call_vectorcall(nargs, vm); } + if self.specialization_call_recursion_guard(vm) { + return self.execute_call_vectorcall(nargs, vm); + } let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); let self_or_null = self.pop_value_opt(); let callable = self.pop_value(); @@ -3955,9 +3941,6 @@ impl ExecutingFrame<'_> { if self.specialization_eval_frame_active(vm) { return self.execute_call_vectorcall(nargs, vm); } - if self.specialization_call_recursion_guard(vm) { - return self.execute_call_vectorcall(nargs, vm); - } // Stack: [callable, self_or_null(NULL), arg1, ..., argN] let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self @@ -3980,6 +3963,9 @@ impl ExecutingFrame<'_> { if !self.specialization_has_datastack_space_for_func(vm, func) { return self.execute_call_vectorcall(nargs, vm); } + if self.specialization_call_recursion_guard(vm) { + return self.execute_call_vectorcall(nargs, vm); + } let pos_args: Vec = self.pop_multiple(nargs as usize).collect(); self.pop_value_opt(); // null (self_or_null) @@ -4185,14 +4171,14 @@ impl ExecutingFrame<'_> { if self.specialization_eval_frame_active(vm) { return self.execute_call_vectorcall(nargs, vm); } - if self.specialization_call_recursion_guard(vm) { - return self.execute_call_vectorcall(nargs, vm); - } let callable = self.nth_value(nargs + 1); if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { + if self.specialization_call_recursion_guard(vm) { + return self.execute_call_vectorcall(nargs, vm); + } let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); let self_or_null = self.pop_value_opt(); @@ -4221,9 +4207,6 @@ impl ExecutingFrame<'_> { if self.specialization_eval_frame_active(vm) { return self.execute_call_vectorcall(nargs, vm); } - if self.specialization_call_recursion_guard(vm) { - return self.execute_call_vectorcall(nargs, vm); - } let stack_len = self.localsplus.stack_len(); let self_or_null_is_some = self .localsplus @@ -4239,6 +4222,9 @@ impl ExecutingFrame<'_> { && func.func_version() == cached_version && cached_version != 0 { + if self.specialization_call_recursion_guard(vm) { + return self.execute_call_vectorcall(nargs, vm); + } let nargs_usize = nargs as usize; let pos_args: Vec = self.pop_multiple(nargs_usize).collect(); self.pop_value_opt(); // null (self_or_null) @@ -4481,18 +4467,23 @@ impl ExecutingFrame<'_> { .localsplus .stack_index(stack_len - nargs as usize - 1) .is_some(); - let object_alloc = vm.ctx.types.object_type.slots.alloc.load(); if !self.specialization_eval_frame_active(vm) && !self_or_null_is_some && cached_version != 0 && let Some(cls) = callable.downcast_ref::() && cls.tp_version_tag.load(Acquire) == cached_version && let Some(init_func) = cls.get_cached_init_for_specialization(cached_version) - && let (Some(cls_alloc), Some(object_alloc_fn)) = - (cls.slots.alloc.load(), object_alloc) - && cls_alloc as usize == object_alloc_fn as usize + && let Some(cls_alloc) = cls.slots.alloc.load() { - if !self.specialization_has_datastack_space_for_func(vm, &init_func) { + // CPython guards with code->co_framesize + _Py_InitCleanup.co_framesize. + // RustPython does not materialize frame-specials on datastack, so use + // only the cleanup shim's eval-stack payload (2 stack slots). + const INIT_CLEANUP_STACK_BYTES: usize = 2 * core::mem::size_of::(); + if !self.specialization_has_datastack_space_for_func_with_extra( + vm, + &init_func, + INIT_CLEANUP_STACK_BYTES, + ) { return self.execute_call_vectorcall(nargs, vm); } // Allocate object directly (tp_new == object.__new__, tp_alloc == generic). @@ -4508,7 +4499,10 @@ impl ExecutingFrame<'_> { all_args.push(new_obj.clone()); all_args.extend(pos_args); - let init_result = init_func.invoke_exact_args(all_args, vm)?; + let init_callable: PyObjectRef = init_func.into(); + let effective_nargs = all_args.len(); + let init_result = + vectorcall_function(&init_callable, all_args, effective_nargs, None, vm)?; // EXIT_INIT_CHECK: __init__ must return None if !vm.is_none(&init_result) { @@ -4648,15 +4642,15 @@ impl ExecutingFrame<'_> { if self.specialization_eval_frame_active(vm) { return self.execute_call_kw_vectorcall(nargs, vm); } - if self.specialization_call_recursion_guard(vm) { - return self.execute_call_kw_vectorcall(nargs, vm); - } // Stack: [callable, self_or_null, arg1, ..., argN, kwarg_names] let callable = self.nth_value(nargs + 2); if let Some(func) = callable.downcast_ref_if_exact::(vm) && func.func_version() == cached_version && cached_version != 0 { + if self.specialization_call_recursion_guard(vm) { + return self.execute_call_kw_vectorcall(nargs, vm); + } let nargs_usize = nargs as usize; let kwarg_names_obj = self.pop_value(); let kwarg_names_tuple = kwarg_names_obj @@ -7407,19 +7401,16 @@ impl ExecutingFrame<'_> { } } bytecode::BinaryOperator::Subscr => { - if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { + let b_is_nonnegative_int = b + .downcast_ref_if_exact::(vm) + .is_some_and(|i| i.try_to_primitive::(vm).is_ok()); + if a.downcast_ref_if_exact::(vm).is_some() && b_is_nonnegative_int { Some(Instruction::BinaryOpSubscrListInt) - } else if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { + } else if a.downcast_ref_if_exact::(vm).is_some() && b_is_nonnegative_int { Some(Instruction::BinaryOpSubscrTupleInt) } else if a.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::BinaryOpSubscrDict) - } else if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { + } else if a.downcast_ref_if_exact::(vm).is_some() && b_is_nonnegative_int { Some(Instruction::BinaryOpSubscrStrInt) } else if a.downcast_ref_if_exact::(vm).is_some() && b.downcast_ref::().is_some() @@ -7438,17 +7429,7 @@ impl ExecutingFrame<'_> { type_version = cls.assign_version_tag(); } if type_version != 0 { - let func_version = func.get_version_for_current_state(); - if cls.cache_getitem_for_specialization( - func.to_owned(), - type_version, - func_version, - ) { - unsafe { - self.code - .instructions - .write_cache_u32(cache_base + 1, type_version); - } + if cls.cache_getitem_for_specialization(func.to_owned(), type_version) { Some(Instruction::BinaryOpSubscrGetitem) } else { None @@ -8264,9 +8245,21 @@ impl ExecutingFrame<'_> { &self, vm: &VirtualMachine, func: &Py, + ) -> bool { + self.specialization_has_datastack_space_for_func_with_extra(vm, func, 0) + } + + #[inline] + fn specialization_has_datastack_space_for_func_with_extra( + &self, + vm: &VirtualMachine, + func: &Py, + extra_bytes: usize, ) -> bool { match func.datastack_frame_size_bytes() { - Some(frame_size) => vm.datastack_has_space(frame_size), + Some(frame_size) => frame_size + .checked_add(extra_bytes) + .is_some_and(|size| vm.datastack_has_space(size)), None => true, } } From 17f1fa420d8a15932081df3186bbba52eeabfd8e Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 8 Mar 2026 21:13:50 +0900 Subject: [PATCH 4/6] vm: align BINARY_OP, STORE_SUBSCR, UNPACK_SEQUENCE specialization guards --- crates/vm/src/frame.rs | 459 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 424 insertions(+), 35 deletions(-) diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 2d74e98e95e..18a579c1d36 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1674,17 +1674,25 @@ impl ExecutingFrame<'_> { Instruction::BinaryOpInplaceAddUnicode => { let b = self.top_value(); let a = self.nth_value(1); - if let (Some(a_str), Some(b_str)) = ( + let instr_idx = self.lasti() as usize - 1; + let cache_base = instr_idx + 1; + let target_local = self.binary_op_inplace_unicode_target_local(cache_base, a); + if let (Some(a_str), Some(b_str), Some(target_local)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), + target_local, ) { let result = a_str.as_wtf8().py_add(b_str.as_wtf8()); self.pop_value(); self.pop_value(); - self.push_value(result.to_pyobject(vm)); + self.localsplus.fastlocals_mut()[target_local] = Some(result.to_pyobject(vm)); + self.jump_relative_forward( + 1, + Instruction::BinaryOpInplaceAddUnicode.cache_entries() as u32, + ); Ok(None) } else { - self.execute_bin_op(vm, bytecode::BinaryOperator::InplaceAdd) + self.execute_bin_op(vm, self.binary_op_from_arg(arg)) } } Instruction::BinarySlice => { @@ -3099,8 +3107,9 @@ impl ExecutingFrame<'_> { self.execute_unpack_ex(vm, args.before, args.after) } Instruction::UnpackSequence { count: size } => { - self.adaptive(|s, ii, cb| s.specialize_unpack_sequence(vm, ii, cb)); - self.unpack_sequence(size.get(arg), vm) + let expected = size.get(arg); + self.adaptive(|s, ii, cb| s.specialize_unpack_sequence(vm, expected, ii, cb)); + self.unpack_sequence(expected, vm) } Instruction::WithExceptStart => { // Stack: [..., __exit__, lasti, prev_exc, exc] @@ -3709,15 +3718,13 @@ impl ExecutingFrame<'_> { let value = self.pop_value(); if let Some(list) = obj.downcast_ref_if_exact::(vm) && let Some(int_idx) = idx.downcast_ref_if_exact::(vm) - && let Ok(i) = int_idx.try_to_primitive::(vm) + && let Some(i) = Self::specialization_nonnegative_compact_index(int_idx, vm) { let mut vec = list.borrow_vec_mut(); - if let Some(pos) = vec.wrap_index(i) { - vec[pos] = value; + if i < vec.len() { + vec[i] = value; return Ok(None); } - drop(vec); - return Err(vm.new_index_error("list assignment index out of range")); } obj.set_item(&*idx, value, vm)?; Ok(None) @@ -3788,9 +3795,143 @@ impl ExecutingFrame<'_> { self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) } Instruction::BinaryOpExtend => { - let op = bytecode::BinaryOperator::try_from(u32::from(arg)) - .unwrap_or(bytecode::BinaryOperator::Subscr); - self.execute_bin_op(vm, op) + let op = self.binary_op_from_arg(arg); + let b = self.top_value(); + let a = self.nth_value(1); + + let fast = match op { + bytecode::BinaryOperator::And | bytecode::BinaryOperator::InplaceAnd => { + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let (Some(a_val), Some(b_val)) = ( + Self::specialization_compact_int_value(a_int, vm), + Self::specialization_compact_int_value(b_int, vm), + ) { + Some(vm.ctx.new_int(a_val & b_val).into()) + } else { + None + } + } + bytecode::BinaryOperator::Or | bytecode::BinaryOperator::InplaceOr => { + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let (Some(a_val), Some(b_val)) = ( + Self::specialization_compact_int_value(a_int, vm), + Self::specialization_compact_int_value(b_int, vm), + ) { + Some(vm.ctx.new_int(a_val | b_val).into()) + } else { + None + } + } + bytecode::BinaryOperator::Xor | bytecode::BinaryOperator::InplaceXor => { + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let (Some(a_val), Some(b_val)) = ( + Self::specialization_compact_int_value(a_int, vm), + Self::specialization_compact_int_value(b_int, vm), + ) { + Some(vm.ctx.new_int(a_val ^ b_val).into()) + } else { + None + } + } + bytecode::BinaryOperator::Add => { + if let (Some(a_float), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(b_val) = Self::specialization_compact_int_value(b_int, vm) + && !a_float.to_f64().is_nan() + { + Some(vm.ctx.new_float(a_float.to_f64() + b_val as f64).into()) + } else if let (Some(a_int), Some(b_float)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(a_val) = + Self::specialization_compact_int_value(a_int, vm) + && !b_float.to_f64().is_nan() + { + Some(vm.ctx.new_float(a_val as f64 + b_float.to_f64()).into()) + } else { + None + } + } + bytecode::BinaryOperator::Subtract => { + if let (Some(a_float), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(b_val) = Self::specialization_compact_int_value(b_int, vm) + && !a_float.to_f64().is_nan() + { + Some(vm.ctx.new_float(a_float.to_f64() - b_val as f64).into()) + } else if let (Some(a_int), Some(b_float)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(a_val) = + Self::specialization_compact_int_value(a_int, vm) + && !b_float.to_f64().is_nan() + { + Some(vm.ctx.new_float(a_val as f64 - b_float.to_f64()).into()) + } else { + None + } + } + bytecode::BinaryOperator::Multiply => { + if let (Some(a_float), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(b_val) = Self::specialization_compact_int_value(b_int, vm) + && !a_float.to_f64().is_nan() + { + Some(vm.ctx.new_float(a_float.to_f64() * b_val as f64).into()) + } else if let (Some(a_int), Some(b_float)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(a_val) = + Self::specialization_compact_int_value(a_int, vm) + && !b_float.to_f64().is_nan() + { + Some(vm.ctx.new_float(a_val as f64 * b_float.to_f64()).into()) + } else { + None + } + } + bytecode::BinaryOperator::TrueDivide => { + if let (Some(a_float), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(b_val) = Self::specialization_compact_int_value(b_int, vm) + && b_val != 0 + && !a_float.to_f64().is_nan() + { + Some(vm.ctx.new_float(a_float.to_f64() / b_val as f64).into()) + } else if let (Some(a_int), Some(b_float)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) && let Some(a_val) = + Self::specialization_compact_int_value(a_int, vm) + && b_float.to_f64() != 0.0 + && !b_float.to_f64().is_nan() + { + Some(vm.ctx.new_float(a_val as f64 / b_float.to_f64()).into()) + } else { + None + } + } + _ => None, + }; + + if let Some(result) = fast { + self.pop_value(); + self.pop_value(); + self.push_value(result); + Ok(None) + } else { + self.execute_bin_op(vm, op) + } } Instruction::BinaryOpSubscrListInt => { let b = self.top_value(); @@ -3798,7 +3939,7 @@ impl ExecutingFrame<'_> { if let (Some(list), Some(idx)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Ok(i) = idx.try_to_primitive::(vm) + ) && let Some(i) = Self::specialization_nonnegative_compact_index(idx, vm) { let vec = list.borrow_vec(); if i < vec.len() { @@ -3818,7 +3959,7 @@ impl ExecutingFrame<'_> { if let (Some(tuple), Some(idx)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Ok(i) = idx.try_to_primitive::(vm) + ) && let Some(i) = Self::specialization_nonnegative_compact_index(idx, vm) { let elements = tuple.as_slice(); if i < elements.len() { @@ -3860,7 +4001,7 @@ impl ExecutingFrame<'_> { if let (Some(a_str), Some(b_int)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), - ) && let Ok(i) = b_int.try_to_primitive::(vm) + ) && let Some(i) = Self::specialization_nonnegative_compact_index(b_int, vm) && let Ok(ch) = a_str.getitem_by_index(vm, i as isize) && ch.is_ascii() { @@ -4900,9 +5041,12 @@ impl ExecutingFrame<'_> { if let (Some(a_int), Some(b_int)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), + ) && let (Some(a_val), Some(b_val)) = ( + Self::specialization_compact_int_value(a_int, vm), + Self::specialization_compact_int_value(b_int, vm), ) { let op = self.compare_op_from_arg(arg); - let result = op.eval_ord(a_int.as_bigint().cmp(b_int.as_bigint())); + let result = op.eval_ord(a_val.cmp(&b_val)); self.pop_value(); self.pop_value(); self.push_value(vm.ctx.new_bool(result).into()); @@ -4945,6 +5089,11 @@ impl ExecutingFrame<'_> { b.downcast_ref_if_exact::(vm), ) { let op = self.compare_op_from_arg(arg); + if op != PyComparisonOp::Eq && op != PyComparisonOp::Ne { + let op = bytecode::ComparisonOperator::try_from(u32::from(arg)) + .unwrap_or(bytecode::ComparisonOperator::Equal); + return self.execute_compare(vm, op); + } let result = op.eval_ord(a_str.as_wtf8().cmp(b_str.as_wtf8())); self.pop_value(); self.pop_value(); @@ -7369,7 +7518,36 @@ impl ExecutingFrame<'_> { } else if a.downcast_ref_if_exact::(vm).is_some() && b.downcast_ref_if_exact::(vm).is_some() { - Some(Instruction::BinaryOpAddUnicode) + if self + .binary_op_inplace_unicode_target_local(cache_base, a) + .is_some() + { + Some(Instruction::BinaryOpInplaceAddUnicode) + } else { + Some(Instruction::BinaryOpAddUnicode) + } + } else if let (Some(a_float), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if !a_float.to_f64().is_nan() + && Self::specialization_compact_int_value(b_int, vm).is_some() + { + Some(Instruction::BinaryOpExtend) + } else { + None + } + } else if let (Some(a_int), Some(b_float)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if !b_float.to_f64().is_nan() + && Self::specialization_compact_int_value(a_int, vm).is_some() + { + Some(Instruction::BinaryOpExtend) + } else { + None + } } else { None } @@ -7383,6 +7561,28 @@ impl ExecutingFrame<'_> { && b.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::BinaryOpSubtractFloat) + } else if let (Some(a_float), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if !a_float.to_f64().is_nan() + && Self::specialization_compact_int_value(b_int, vm).is_some() + { + Some(Instruction::BinaryOpExtend) + } else { + None + } + } else if let (Some(a_int), Some(b_float)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if !b_float.to_f64().is_nan() + && Self::specialization_compact_int_value(a_int, vm).is_some() + { + Some(Instruction::BinaryOpExtend) + } else { + None + } } else { None } @@ -7396,14 +7596,64 @@ impl ExecutingFrame<'_> { && b.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::BinaryOpMultiplyFloat) + } else if let (Some(a_float), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if !a_float.to_f64().is_nan() + && Self::specialization_compact_int_value(b_int, vm).is_some() + { + Some(Instruction::BinaryOpExtend) + } else { + None + } + } else if let (Some(a_int), Some(b_float)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if !b_float.to_f64().is_nan() + && Self::specialization_compact_int_value(a_int, vm).is_some() + { + Some(Instruction::BinaryOpExtend) + } else { + None + } + } else { + None + } + } + bytecode::BinaryOperator::TrueDivide => { + if let (Some(a_float), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if !a_float.to_f64().is_nan() + && Self::specialization_compact_int_value(b_int, vm).is_some_and(|x| x != 0) + { + Some(Instruction::BinaryOpExtend) + } else { + None + } + } else if let (Some(a_int), Some(b_float)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if !b_float.to_f64().is_nan() + && b_float.to_f64() != 0.0 + && Self::specialization_compact_int_value(a_int, vm).is_some() + { + Some(Instruction::BinaryOpExtend) + } else { + None + } } else { None } } bytecode::BinaryOperator::Subscr => { - let b_is_nonnegative_int = b - .downcast_ref_if_exact::(vm) - .is_some_and(|i| i.try_to_primitive::(vm).is_ok()); + let b_is_nonnegative_int = b.downcast_ref_if_exact::(vm).is_some_and(|i| { + Self::specialization_nonnegative_compact_index(i, vm).is_some() + }); if a.downcast_ref_if_exact::(vm).is_some() && b_is_nonnegative_int { Some(Instruction::BinaryOpSubscrListInt) } else if a.downcast_ref_if_exact::(vm).is_some() && b_is_nonnegative_int { @@ -7446,7 +7696,69 @@ impl ExecutingFrame<'_> { if a.downcast_ref_if_exact::(vm).is_some() && b.downcast_ref_if_exact::(vm).is_some() { - Some(Instruction::BinaryOpInplaceAddUnicode) + if self + .binary_op_inplace_unicode_target_local(cache_base, a) + .is_some() + { + Some(Instruction::BinaryOpInplaceAddUnicode) + } else { + Some(Instruction::BinaryOpAddUnicode) + } + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpAddInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpAddFloat) + } else { + None + } + } + bytecode::BinaryOperator::InplaceSubtract => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubtractInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpSubtractFloat) + } else { + None + } + } + bytecode::BinaryOperator::InplaceMultiply => { + if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpMultiplyInt) + } else if a.downcast_ref_if_exact::(vm).is_some() + && b.downcast_ref_if_exact::(vm).is_some() + { + Some(Instruction::BinaryOpMultiplyFloat) + } else { + None + } + } + bytecode::BinaryOperator::And + | bytecode::BinaryOperator::Or + | bytecode::BinaryOperator::Xor + | bytecode::BinaryOperator::InplaceAnd + | bytecode::BinaryOperator::InplaceOr + | bytecode::BinaryOperator::InplaceXor => { + if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if Self::specialization_compact_int_value(a_int, vm).is_some() + && Self::specialization_compact_int_value(b_int, vm).is_some() + { + Some(Instruction::BinaryOpExtend) + } else { + None + } } else { None } @@ -7457,6 +7769,27 @@ impl ExecutingFrame<'_> { self.commit_specialization(instr_idx, cache_base, new_op); } + #[inline] + fn binary_op_inplace_unicode_target_local( + &self, + cache_base: usize, + left: &PyObject, + ) -> Option { + let next_idx = cache_base + Instruction::BinaryOp { op: Arg::marker() }.cache_entries(); + let unit = self.code.instructions.get(next_idx)?; + let next_op = unit.op.to_base().unwrap_or(unit.op); + if !matches!(next_op, Instruction::StoreFast { .. }) { + return None; + } + let local_idx = usize::from(u8::from(unit.arg)); + self.localsplus + .fastlocals() + .get(local_idx) + .and_then(|slot| slot.as_ref()) + .filter(|local| local.is(left)) + .map(|_| local_idx) + } + /// Adaptive counter: trigger specialization at zero, otherwise advance countdown. #[inline] fn adaptive(&mut self, specialize: impl FnOnce(&mut Self, usize, usize)) { @@ -8107,7 +8440,7 @@ impl ExecutingFrame<'_> { fn specialize_compare_op( &mut self, vm: &VirtualMachine, - _op: bytecode::ComparisonOperator, + op: bytecode::ComparisonOperator, instr_idx: usize, cache_base: usize, ) { @@ -8120,16 +8453,25 @@ impl ExecutingFrame<'_> { let b = self.top_value(); let a = self.nth_value(1); - let new_op = if a.downcast_ref_if_exact::(vm).is_some() - && b.downcast_ref_if_exact::(vm).is_some() - { - Some(Instruction::CompareOpInt) + let new_op = if let (Some(a_int), Some(b_int)) = ( + a.downcast_ref_if_exact::(vm), + b.downcast_ref_if_exact::(vm), + ) { + if Self::specialization_compact_int_value(a_int, vm).is_some() + && Self::specialization_compact_int_value(b_int, vm).is_some() + { + Some(Instruction::CompareOpInt) + } else { + None + } } else if a.downcast_ref_if_exact::(vm).is_some() && b.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::CompareOpFloat) } else if a.downcast_ref_if_exact::(vm).is_some() && b.downcast_ref_if_exact::(vm).is_some() + && (op == bytecode::ComparisonOperator::Equal + || op == bytecode::ComparisonOperator::NotEqual) { Some(Instruction::CompareOpStr) } else { @@ -8147,6 +8489,12 @@ impl ExecutingFrame<'_> { .into() } + /// Recover the BinaryOperator from the instruction arg byte. + /// `replace_op` preserves the arg byte, so the original op remains accessible. + fn binary_op_from_arg(&self, arg: bytecode::OpArg) -> bytecode::BinaryOperator { + bytecode::BinaryOperator::try_from(u32::from(arg)).unwrap_or(bytecode::BinaryOperator::Add) + } + fn specialize_to_bool(&mut self, vm: &VirtualMachine, instr_idx: usize, cache_base: usize) { if !matches!( self.code.instructions.read_op(instr_idx), @@ -8167,7 +8515,8 @@ impl ExecutingFrame<'_> { Some(Instruction::ToBoolList) } else if cls.is(PyStr::class(&vm.ctx)) { Some(Instruction::ToBoolStr) - } else if cls.slots.as_number.boolean.load().is_none() + } else if cls.slots.flags.has_feature(PyTypeFlags::HEAPTYPE) + && cls.slots.as_number.boolean.load().is_none() && cls.slots.as_mapping.length.load().is_none() && cls.slots.as_sequence.length.load().is_none() { @@ -8264,6 +8613,31 @@ impl ExecutingFrame<'_> { } } + #[inline] + fn specialization_compact_int_value(i: &PyInt, vm: &VirtualMachine) -> Option { + // CPython's _PyLong_IsCompact() means a one-digit PyLong (base 2^30), + // i.e. abs(value) <= 2^30 - 1. + const CPYTHON_COMPACT_LONG_ABS_MAX: i64 = (1i64 << 30) - 1; + let v = i.try_to_primitive::(vm).ok()?; + if (-CPYTHON_COMPACT_LONG_ABS_MAX..=CPYTHON_COMPACT_LONG_ABS_MAX).contains(&v) { + Some(v as isize) + } else { + None + } + } + + #[inline] + fn specialization_nonnegative_compact_index(i: &PyInt, vm: &VirtualMachine) -> Option { + // CPython's _PyLong_IsNonNegativeCompact() uses a single base-2^30 digit. + const CPYTHON_COMPACT_LONG_MAX: u64 = (1u64 << 30) - 1; + let v = i.try_to_primitive::(vm).ok()?; + if v <= CPYTHON_COMPACT_LONG_MAX { + Some(v as usize) + } else { + None + } + } + #[inline] fn specialization_call_recursion_guard(&self, vm: &VirtualMachine) -> bool { vm.current_recursion_depth().saturating_add(1) >= vm.recursion_limit.get() @@ -8390,10 +8764,18 @@ impl ExecutingFrame<'_> { let obj = self.nth_value(1); let idx = self.top_value(); - let new_op = if obj.downcast_ref_if_exact::(vm).is_some() - && idx.downcast_ref_if_exact::(vm).is_some() - { - Some(Instruction::StoreSubscrListInt) + let new_op = if let (Some(list), Some(int_idx)) = ( + obj.downcast_ref_if_exact::(vm), + idx.downcast_ref_if_exact::(vm), + ) { + let list_len = list.borrow_vec().len(); + if Self::specialization_nonnegative_compact_index(int_idx, vm) + .is_some_and(|i| i < list_len) + { + Some(Instruction::StoreSubscrListInt) + } else { + None + } } else if obj.downcast_ref_if_exact::(vm).is_some() { Some(Instruction::StoreSubscrDict) } else { @@ -8427,6 +8809,7 @@ impl ExecutingFrame<'_> { fn specialize_unpack_sequence( &mut self, vm: &VirtualMachine, + expected_count: u32, instr_idx: usize, cache_base: usize, ) { @@ -8438,13 +8821,19 @@ impl ExecutingFrame<'_> { } let obj = self.top_value(); let new_op = if let Some(tuple) = obj.downcast_ref_if_exact::(vm) { - if tuple.len() == 2 { + if tuple.len() != expected_count as usize { + None + } else if expected_count == 2 { Some(Instruction::UnpackSequenceTwoTuple) } else { Some(Instruction::UnpackSequenceTuple) } - } else if obj.downcast_ref_if_exact::(vm).is_some() { - Some(Instruction::UnpackSequenceList) + } else if let Some(list) = obj.downcast_ref_if_exact::(vm) { + if list.borrow_vec().len() == expected_count as usize { + Some(Instruction::UnpackSequenceList) + } else { + None + } } else { None }; From f6872fa405962cf24a37a1a4025a8cd98d70e3f5 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 8 Mar 2026 21:13:55 +0900 Subject: [PATCH 5/6] vm: finalize unicode/subscr specialization parity and regressions --- crates/vm/src/builtins/list.rs | 18 ++++++++++- crates/vm/src/builtins/str.rs | 29 ++++++++++++++--- crates/vm/src/frame.rs | 31 +++++++++++++----- crates/vm/src/vm/context.rs | 8 +++++ ...alization_binary_op_inplace_add_unicode.py | 32 +++++++++++++++++++ ...on_binary_op_subscr_str_int_ascii_cache.py | 13 ++++++++ ...nary_op_subscr_str_int_latin1_singleton.py | 13 ++++++++ 7 files changed, 131 insertions(+), 13 deletions(-) create mode 100644 extra_tests/snippets/specialization_binary_op_inplace_add_unicode.py create mode 100644 extra_tests/snippets/specialization_binary_op_subscr_str_int_ascii_cache.py create mode 100644 extra_tests/snippets/specialization_binary_op_subscr_str_int_latin1_singleton.py diff --git a/crates/vm/src/builtins/list.rs b/crates/vm/src/builtins/list.rs index c13dea57169..34c40bba209 100644 --- a/crates/vm/src/builtins/list.rs +++ b/crates/vm/src/builtins/list.rs @@ -286,7 +286,16 @@ impl PyList { fn _setitem(&self, needle: &PyObject, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> { match SequenceIndex::try_from_borrowed_object(vm, needle, "list")? { - SequenceIndex::Int(index) => self.borrow_vec_mut().setitem_by_index(vm, index, value), + SequenceIndex::Int(index) => self + .borrow_vec_mut() + .setitem_by_index(vm, index, value) + .map_err(|e| { + if e.class().is(vm.ctx.exceptions.index_error) { + vm.new_index_error("list assignment index out of range".to_owned()) + } else { + e + } + }), SequenceIndex::Slice(slice) => { let sec = extract_cloned(&value, Ok, vm)?; self.borrow_vec_mut().setitem_by_slice(vm, slice, &sec) @@ -509,6 +518,13 @@ impl AsSequence for PyList { } else { zelf.borrow_vec_mut().delitem_by_index(vm, i) } + .map_err(|e| { + if e.class().is(vm.ctx.exceptions.index_error) { + vm.new_index_error("list assignment index out of range".to_owned()) + } else { + e + } + }) }), contains: atomic_func!(|seq, target, vm| { let zelf = PyList::sequence_downcast(seq); diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs index 38102c18865..6322c5bee7f 100644 --- a/crates/vm/src/builtins/str.rs +++ b/crates/vm/src/builtins/str.rs @@ -1500,14 +1500,25 @@ impl PyRef { } pub fn concat_in_place(&mut self, other: &Wtf8, vm: &VirtualMachine) { - // TODO: call [A]Rc::get_mut on the str to try to mutate the data in place if other.is_empty() { return; } let mut s = Wtf8Buf::with_capacity(self.byte_len() + other.len()); s.push_wtf8(self.as_ref()); s.push_wtf8(other); - *self = PyStr::from(s).into_ref(&vm.ctx); + if self.as_object().strong_count() == 1 { + // SAFETY: strong_count()==1 guarantees unique ownership of this PyStr. + // Mutating payload in place preserves semantics while avoiding PyObject reallocation. + unsafe { + let payload = self.payload() as *const PyStr as *mut PyStr; + (*payload).data = PyStr::from(s).data; + (*payload) + .hash + .store(hash::SENTINEL, atomic::Ordering::Relaxed); + } + } else { + *self = PyStr::from(s).into_ref(&vm.ctx); + } } pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult> { @@ -1678,13 +1689,23 @@ impl ToPyObject for Wtf8Buf { impl ToPyObject for char { fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef { - vm.ctx.new_str(self).into() + let cp = self as u32; + if cp <= u8::MAX as u32 { + vm.ctx.latin1_char_cache[cp as usize].clone().into() + } else { + vm.ctx.new_str(self).into() + } } } impl ToPyObject for CodePoint { fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef { - vm.ctx.new_str(self).into() + let cp = self.to_u32(); + if cp <= u8::MAX as u32 { + vm.ctx.latin1_char_cache[cp as usize].clone().into() + } else { + vm.ctx.new_str(self).into() + } } } diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 18a579c1d36..74935f0fd49 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1669,23 +1669,37 @@ impl ExecutingFrame<'_> { self.adaptive(|s, ii, cb| s.specialize_binary_op(vm, op_val, ii, cb)); self.execute_bin_op(vm, op_val) } - // TODO: In CPython, this does in-place unicode concatenation when - // refcount is 1. Falls back to regular iadd for now. + // Super-instruction for BINARY_OP_ADD_UNICODE + STORE_FAST targeting + // the left local, mirroring CPython's BINARY_OP_INPLACE_ADD_UNICODE shape. Instruction::BinaryOpInplaceAddUnicode => { let b = self.top_value(); let a = self.nth_value(1); let instr_idx = self.lasti() as usize - 1; let cache_base = instr_idx + 1; let target_local = self.binary_op_inplace_unicode_target_local(cache_base, a); - if let (Some(a_str), Some(b_str), Some(target_local)) = ( + if let (Some(_a_str), Some(_b_str), Some(target_local)) = ( a.downcast_ref_if_exact::(vm), b.downcast_ref_if_exact::(vm), target_local, ) { - let result = a_str.as_wtf8().py_add(b_str.as_wtf8()); - self.pop_value(); - self.pop_value(); - self.localsplus.fastlocals_mut()[target_local] = Some(result.to_pyobject(vm)); + let right = self.pop_value(); + let left = self.pop_value(); + + let local_obj = self.localsplus.fastlocals_mut()[target_local] + .take() + .expect("BINARY_OP_INPLACE_ADD_UNICODE target local missing"); + debug_assert!(local_obj.is(&left)); + let mut local_str = local_obj + .downcast_exact::(vm) + .expect("BINARY_OP_INPLACE_ADD_UNICODE target local not exact str") + .into_pyref(); + drop(left); + let right_str = right + .downcast_ref_if_exact::(vm) + .expect("BINARY_OP_INPLACE_ADD_UNICODE right operand not exact str"); + local_str.concat_in_place(right_str.as_wtf8(), vm); + + self.localsplus.fastlocals_mut()[target_local] = Some(local_str.into()); self.jump_relative_forward( 1, Instruction::BinaryOpInplaceAddUnicode.cache_entries() as u32, @@ -4005,9 +4019,10 @@ impl ExecutingFrame<'_> { && let Ok(ch) = a_str.getitem_by_index(vm, i as isize) && ch.is_ascii() { + let ascii_idx = ch.to_u32() as usize; self.pop_value(); self.pop_value(); - self.push_value(PyStr::from(ch).into_pyobject(vm)); + self.push_value(vm.ctx.ascii_char_cache[ascii_idx].clone().into()); return Ok(None); } self.execute_bin_op(vm, bytecode::BinaryOperator::Subscr) diff --git a/crates/vm/src/vm/context.rs b/crates/vm/src/vm/context.rs index d864548ff08..dfd8829549f 100644 --- a/crates/vm/src/vm/context.rs +++ b/crates/vm/src/vm/context.rs @@ -47,6 +47,8 @@ pub struct Context { pub types: TypeZoo, pub exceptions: exceptions::ExceptionZoo, pub int_cache_pool: Vec, + pub(crate) latin1_char_cache: Vec>, + pub(crate) ascii_char_cache: Vec>, // there should only be exact objects of str in here, no non-str objects and no subclasses pub(crate) string_pool: StringPool, pub(crate) slot_new_wrapper: PyMethodDef, @@ -324,6 +326,10 @@ impl Context { ) }) .collect(); + let latin1_char_cache: Vec> = (0u8..=255) + .map(|b| create_object(PyStr::from(char::from(b)), types.str_type)) + .collect(); + let ascii_char_cache = latin1_char_cache[..128].to_vec(); let true_value = create_object(PyBool(PyInt::from(1)), types.bool_type); let false_value = create_object(PyBool(PyInt::from(0)), types.bool_type); @@ -371,6 +377,8 @@ impl Context { types, exceptions, int_cache_pool, + latin1_char_cache, + ascii_char_cache, string_pool, slot_new_wrapper, names, diff --git a/extra_tests/snippets/specialization_binary_op_inplace_add_unicode.py b/extra_tests/snippets/specialization_binary_op_inplace_add_unicode.py new file mode 100644 index 00000000000..3e7e3e09f69 --- /dev/null +++ b/extra_tests/snippets/specialization_binary_op_inplace_add_unicode.py @@ -0,0 +1,32 @@ +class S(str): + def __add__(self, other): + return "ADD" + + def __iadd__(self, other): + return "IADD" + + +def add_path_fallback_uses_add(): + x = "a" + y = "b" + for i in range(1200): + if i == 600: + x = S("s") + y = "t" + x = x + y + return x + + +def iadd_path_fallback_uses_iadd(): + x = "a" + y = "b" + for i in range(1200): + if i == 600: + x = S("s") + y = "t" + x += y + return x + + +assert add_path_fallback_uses_add().startswith("ADD") +assert iadd_path_fallback_uses_iadd().startswith("IADD") diff --git a/extra_tests/snippets/specialization_binary_op_subscr_str_int_ascii_cache.py b/extra_tests/snippets/specialization_binary_op_subscr_str_int_ascii_cache.py new file mode 100644 index 00000000000..b979c4285f9 --- /dev/null +++ b/extra_tests/snippets/specialization_binary_op_subscr_str_int_ascii_cache.py @@ -0,0 +1,13 @@ +def check_ascii_subscr_singleton_after_warmup(): + s = "abc" + first = None + for i in range(4000): + c = s[0] + if i >= 3500: + if first is None: + first = c + else: + assert c is first + + +check_ascii_subscr_singleton_after_warmup() diff --git a/extra_tests/snippets/specialization_binary_op_subscr_str_int_latin1_singleton.py b/extra_tests/snippets/specialization_binary_op_subscr_str_int_latin1_singleton.py new file mode 100644 index 00000000000..9220824b20b --- /dev/null +++ b/extra_tests/snippets/specialization_binary_op_subscr_str_int_latin1_singleton.py @@ -0,0 +1,13 @@ +def check_latin1_subscr_singleton_after_warmup(): + for s in ("abc", "éx"): + first = None + for i in range(5000): + c = s[0] + if i >= 4500: + if first is None: + first = c + else: + assert c is first + + +check_latin1_subscr_singleton_after_warmup() From e65cbc0d58146156651440658cf5b1205ac361c3 Mon Sep 17 00:00:00 2001 From: "Jeong, YunWon" Date: Sun, 8 Mar 2026 21:13:58 +0900 Subject: [PATCH 6/6] vm: finalize specialization GC safety, tests, and cleanup --- crates/vm/src/builtins/function.rs | 6 +- crates/vm/src/builtins/type.rs | 39 +++++++---- crates/vm/src/builtins/union.rs | 6 +- crates/vm/src/coroutine.rs | 2 +- crates/vm/src/frame.rs | 15 ++-- crates/vm/src/function/method.rs | 2 +- crates/vm/src/object/core.rs | 7 +- crates/vm/src/stdlib/builtins.rs | 2 +- crates/vm/src/stdlib/posix.rs | 2 +- crates/vm/src/stdlib/sys/monitoring.rs | 2 +- crates/vm/src/vm/mod.rs | 3 +- ...alization_binary_op_inplace_add_unicode.py | 32 --------- ...on_binary_op_subscr_str_int_ascii_cache.py | 13 ---- ...nary_op_subscr_str_int_latin1_singleton.py | 13 ---- extra_tests/snippets/vm_specialization.py | 68 +++++++++++++++++++ 15 files changed, 116 insertions(+), 96 deletions(-) delete mode 100644 extra_tests/snippets/specialization_binary_op_inplace_add_unicode.py delete mode 100644 extra_tests/snippets/specialization_binary_op_subscr_str_int_ascii_cache.py delete mode 100644 extra_tests/snippets/specialization_binary_op_subscr_str_int_latin1_singleton.py create mode 100644 extra_tests/snippets/vm_specialization.py diff --git a/crates/vm/src/builtins/function.rs b/crates/vm/src/builtins/function.rs index d18fdd19701..f74057f4816 100644 --- a/crates/vm/src/builtins/function.rs +++ b/crates/vm/src/builtins/function.rs @@ -640,7 +640,7 @@ impl Py { new_v } - /// CPython function_kind(SIMPLE_FUNCTION) equivalent for CALL specialization. + /// function_kind(SIMPLE_FUNCTION) equivalent for CALL specialization. /// Returns true if: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonly args. pub(crate) fn is_simple_for_call_specialization(&self) -> bool { let code: &Py = &self.code; @@ -705,8 +705,8 @@ impl Py { ); debug_assert_eq!(code.kwonlyarg_count, 0); - // Generator/coroutine code objects are SIMPLE_FUNCTION in CPython's - // call specialization classification, but their call path must still + // Generator/coroutine code objects are SIMPLE_FUNCTION in call + // specialization classification, but their call path must still // go through invoke() to produce generator/coroutine objects. if code .flags diff --git a/crates/vm/src/builtins/type.rs b/crates/vm/src/builtins/type.rs index a894ab84032..d055bf6fabc 100644 --- a/crates/vm/src/builtins/type.rs +++ b/crates/vm/src/builtins/type.rs @@ -235,6 +235,7 @@ unsafe impl crate::object::Traverse for PyType { .count(); if let Some(ext) = self.heaptype_ext.as_ref() { ext.specialization_init.read().traverse(tracer_fn); + ext.specialization_getitem.read().traverse(tracer_fn); } } @@ -263,11 +264,19 @@ unsafe impl crate::object::Traverse for PyType { out.push(val); } } - if let Some(ext) = self.heaptype_ext.as_ref() - && let Some(mut guard) = ext.specialization_init.try_write() - && let Some(init) = guard.take() - { - out.push(init.into()); + if let Some(ext) = self.heaptype_ext.as_ref() { + if let Some(mut guard) = ext.specialization_init.try_write() + && let Some(init) = guard.take() + { + out.push(init.into()); + } + if let Some(mut guard) = ext.specialization_getitem.try_write() + && let Some(getitem) = guard.take() + { + out.push(getitem.into()); + ext.specialization_getitem_version + .store(0, Ordering::Release); + } } } } @@ -860,14 +869,18 @@ impl PyType { let Some(ext) = self.heaptype_ext.as_ref() else { return false; }; - if tp_version == 0 || self.tp_version_tag.load(Ordering::Acquire) != tp_version { + if tp_version == 0 { return false; } let func_version = getitem.get_version_for_current_state(); if func_version == 0 { return false; } - *ext.specialization_getitem.write() = Some(getitem); + let mut guard = ext.specialization_getitem.write(); + if self.tp_version_tag.load(Ordering::Acquire) != tp_version { + return false; + } + *guard = Some(getitem); ext.specialization_getitem_version .store(func_version, Ordering::Release); true @@ -876,15 +889,15 @@ impl PyType { /// Read cached __getitem__ for BINARY_OP_SUBSCR_GETITEM specialization. pub(crate) fn get_cached_getitem_for_specialization(&self) -> Option<(PyRef, u32)> { let ext = self.heaptype_ext.as_ref()?; - if self.tp_version_tag.load(Ordering::Acquire) == 0 { - return None; - } let cached_version = ext.specialization_getitem_version.load(Ordering::Acquire); if cached_version == 0 { return None; } - ext.specialization_getitem - .read() + let guard = ext.specialization_getitem.read(); + if self.tp_version_tag.load(Ordering::Acquire) == 0 { + return None; + } + guard .as_ref() .map(|getitem| (getitem.to_owned(), cached_version)) } @@ -2326,7 +2339,7 @@ impl Py { #[pymethod] fn __instancecheck__(&self, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { - // Use real_is_instance to avoid infinite recursion, matching CPython's behavior + // Use real_is_instance to avoid infinite recursion obj.real_is_instance(self.as_object(), vm) } diff --git a/crates/vm/src/builtins/union.rs b/crates/vm/src/builtins/union.rs index 639ea3036e3..a8fc41151ae 100644 --- a/crates/vm/src/builtins/union.rs +++ b/crates/vm/src/builtins/union.rs @@ -51,7 +51,7 @@ impl PyUnion { }) } - /// Direct access to args field, matching CPython's _Py_union_args + /// Direct access to args field (_Py_union_args) #[inline] pub fn args(&self) -> &Py { &self.args @@ -292,8 +292,8 @@ fn dedup_and_flatten_args(args: &Py, vm: &VirtualMachine) -> PyResult = Vec::with_capacity(args.len()); diff --git a/crates/vm/src/coroutine.rs b/crates/vm/src/coroutine.rs index 51288cbd044..07158c48859 100644 --- a/crates/vm/src/coroutine.rs +++ b/crates/vm/src/coroutine.rs @@ -189,7 +189,7 @@ impl Coro { exc_tb: PyObjectRef, vm: &VirtualMachine, ) -> PyResult { - // Validate throw arguments (matching CPython _gen_throw) + // Validate throw arguments (_gen_throw) if exc_type.fast_isinstance(vm.ctx.exceptions.base_exception_type) && !vm.is_none(&exc_val) { return Err(vm.new_type_error("instance exception may not have a separate value")); diff --git a/crates/vm/src/frame.rs b/crates/vm/src/frame.rs index 74935f0fd49..3d92390ef90 100644 --- a/crates/vm/src/frame.rs +++ b/crates/vm/src/frame.rs @@ -1670,7 +1670,7 @@ impl ExecutingFrame<'_> { self.execute_bin_op(vm, op_val) } // Super-instruction for BINARY_OP_ADD_UNICODE + STORE_FAST targeting - // the left local, mirroring CPython's BINARY_OP_INPLACE_ADD_UNICODE shape. + // the left local, matching BINARY_OP_INPLACE_ADD_UNICODE shape. Instruction::BinaryOpInplaceAddUnicode => { let b = self.top_value(); let a = self.nth_value(1); @@ -4631,8 +4631,8 @@ impl ExecutingFrame<'_> { && let Some(init_func) = cls.get_cached_init_for_specialization(cached_version) && let Some(cls_alloc) = cls.slots.alloc.load() { - // CPython guards with code->co_framesize + _Py_InitCleanup.co_framesize. - // RustPython does not materialize frame-specials on datastack, so use + // co_framesize + _Py_InitCleanup.co_framesize guard. + // We do not materialize frame-specials on datastack, so use // only the cleanup shim's eval-stack payload (2 stack slots). const INIT_CLEANUP_STACK_BYTES: usize = 2 * core::mem::size_of::(); if !self.specialization_has_datastack_space_for_func_with_extra( @@ -5392,8 +5392,7 @@ impl ExecutingFrame<'_> { Instruction::LoadGlobalModule => { let oparg = u32::from(arg); let cache_base = self.lasti() as usize; - // Keep specialized opcode on guard miss, matching CPython's - // JUMP_TO_PREDICTED(LOAD_GLOBAL) behavior. + // Keep specialized opcode on guard miss (JUMP_TO_PREDICTED behavior). let cached_version = self.code.instructions.read_cache_u16(cache_base + 1); let cached_index = self.code.instructions.read_cache_u16(cache_base + 3); if let Ok(current_version) = u16::try_from(self.globals.version()) @@ -7271,7 +7270,7 @@ impl ExecutingFrame<'_> { && func.can_specialize_call(1) && !self.specialization_eval_frame_active(_vm) { - // Property specialization caches fget directly, matching CPython. + // Property specialization caches fget directly. let fget_ptr = &*fget as *const PyObject as usize; unsafe { self.write_cached_descriptor(cache_base, type_version, fget_ptr); @@ -8630,7 +8629,7 @@ impl ExecutingFrame<'_> { #[inline] fn specialization_compact_int_value(i: &PyInt, vm: &VirtualMachine) -> Option { - // CPython's _PyLong_IsCompact() means a one-digit PyLong (base 2^30), + // _PyLong_IsCompact(): a one-digit PyLong (base 2^30), // i.e. abs(value) <= 2^30 - 1. const CPYTHON_COMPACT_LONG_ABS_MAX: i64 = (1i64 << 30) - 1; let v = i.try_to_primitive::(vm).ok()?; @@ -8643,7 +8642,7 @@ impl ExecutingFrame<'_> { #[inline] fn specialization_nonnegative_compact_index(i: &PyInt, vm: &VirtualMachine) -> Option { - // CPython's _PyLong_IsNonNegativeCompact() uses a single base-2^30 digit. + // _PyLong_IsNonNegativeCompact(): a single base-2^30 digit. const CPYTHON_COMPACT_LONG_MAX: u64 = (1u64 << 30) - 1; let v = i.try_to_primitive::(vm).ok()?; if v <= CPYTHON_COMPACT_LONG_MAX { diff --git a/crates/vm/src/function/method.rs b/crates/vm/src/function/method.rs index f2affef905d..295e4d89adf 100644 --- a/crates/vm/src/function/method.rs +++ b/crates/vm/src/function/method.rs @@ -214,7 +214,7 @@ impl PyMethodDef { class: &'static Py, ) -> PyRef { debug_assert!(self.flags.contains(PyMethodFlags::STATIC)); - // Set zelf to the class, matching CPython's m_self = type for static methods. + // Set zelf to the class (m_self = type for static methods). // Callable::call skips prepending when STATIC flag is set. let func = PyNativeFunction { zelf: Some(class.to_owned().into()), diff --git a/crates/vm/src/object/core.rs b/crates/vm/src/object/core.rs index c40d204cd60..a7e5b519f9a 100644 --- a/crates/vm/src/object/core.rs +++ b/crates/vm/src/object/core.rs @@ -1674,10 +1674,9 @@ impl PyObject { } // 2. Clear dict and member slots (subtype_clear) - // Use mutable access to actually detach the dict, matching CPython's - // Py_CLEAR(*_PyObject_GetDictPtr(self)) which NULLs the dict pointer - // without clearing dict contents. This is critical because the dict - // may still be referenced by other live objects (e.g. function.__globals__). + // Detach the dict via Py_CLEAR(*_PyObject_GetDictPtr(self)) — NULL + // the pointer without clearing dict contents. The dict may still be + // referenced by other live objects (e.g. function.__globals__). if obj.0.has_ext() { let self_addr = (ptr as *const u8).addr(); let ext_ptr = core::ptr::with_exposed_provenance_mut::( diff --git a/crates/vm/src/stdlib/builtins.rs b/crates/vm/src/stdlib/builtins.rs index e634ce18428..c145c5f8a41 100644 --- a/crates/vm/src/stdlib/builtins.rs +++ b/crates/vm/src/stdlib/builtins.rs @@ -181,7 +181,7 @@ mod builtins { /// Decode source bytes to a string, handling PEP 263 encoding declarations /// and BOM. Raises SyntaxError for invalid UTF-8 without an encoding - /// declaration (matching CPython behavior). + /// declaration. /// Check if an encoding name is a UTF-8 variant after normalization. /// Matches: utf-8, utf_8, utf8, UTF-8, etc. #[cfg(feature = "parser")] diff --git a/crates/vm/src/stdlib/posix.rs b/crates/vm/src/stdlib/posix.rs index 5ecc72c7087..0deb22d6488 100644 --- a/crates/vm/src/stdlib/posix.rs +++ b/crates/vm/src/stdlib/posix.rs @@ -873,7 +873,7 @@ pub mod module { } /// Best-effort number of OS threads in this process. - /// Returns <= 0 when unavailable, mirroring CPython fallback behavior. + /// Returns <= 0 when unavailable. fn get_number_of_os_threads() -> isize { #[cfg(target_os = "macos")] { diff --git a/crates/vm/src/stdlib/sys/monitoring.rs b/crates/vm/src/stdlib/sys/monitoring.rs index 6d1aeb9c8f3..739165073af 100644 --- a/crates/vm/src/stdlib/sys/monitoring.rs +++ b/crates/vm/src/stdlib/sys/monitoring.rs @@ -777,7 +777,7 @@ fn fire( // Non-local events (RAISE, EXCEPTION_HANDLED, PY_UNWIND, etc.) // cannot be disabled per code object. if event_id >= LOCAL_EVENTS_COUNT { - // Remove the callback, matching CPython behavior. + // Remove the callback. let mut state = vm.state.monitoring.lock(); state.callbacks.remove(&(tool, event_id)); return Err(vm.new_value_error(format!( diff --git a/crates/vm/src/vm/mod.rs b/crates/vm/src/vm/mod.rs index a56bc16cd38..72899016675 100644 --- a/crates/vm/src/vm/mod.rs +++ b/crates/vm/src/vm/mod.rs @@ -253,8 +253,7 @@ impl StopTheWorldState { } /// Try to CAS detached threads directly to SUSPENDED and check whether - /// stop countdown reached zero after parking detached threads - /// (`park_detached_threads`), matching CPython behavior class. + /// stop countdown reached zero after parking detached threads. fn park_detached_threads(&self, vm: &VirtualMachine) -> bool { use thread::{THREAD_ATTACHED, THREAD_DETACHED, THREAD_SUSPENDED}; let requester = self.requester.load(Ordering::Relaxed); diff --git a/extra_tests/snippets/specialization_binary_op_inplace_add_unicode.py b/extra_tests/snippets/specialization_binary_op_inplace_add_unicode.py deleted file mode 100644 index 3e7e3e09f69..00000000000 --- a/extra_tests/snippets/specialization_binary_op_inplace_add_unicode.py +++ /dev/null @@ -1,32 +0,0 @@ -class S(str): - def __add__(self, other): - return "ADD" - - def __iadd__(self, other): - return "IADD" - - -def add_path_fallback_uses_add(): - x = "a" - y = "b" - for i in range(1200): - if i == 600: - x = S("s") - y = "t" - x = x + y - return x - - -def iadd_path_fallback_uses_iadd(): - x = "a" - y = "b" - for i in range(1200): - if i == 600: - x = S("s") - y = "t" - x += y - return x - - -assert add_path_fallback_uses_add().startswith("ADD") -assert iadd_path_fallback_uses_iadd().startswith("IADD") diff --git a/extra_tests/snippets/specialization_binary_op_subscr_str_int_ascii_cache.py b/extra_tests/snippets/specialization_binary_op_subscr_str_int_ascii_cache.py deleted file mode 100644 index b979c4285f9..00000000000 --- a/extra_tests/snippets/specialization_binary_op_subscr_str_int_ascii_cache.py +++ /dev/null @@ -1,13 +0,0 @@ -def check_ascii_subscr_singleton_after_warmup(): - s = "abc" - first = None - for i in range(4000): - c = s[0] - if i >= 3500: - if first is None: - first = c - else: - assert c is first - - -check_ascii_subscr_singleton_after_warmup() diff --git a/extra_tests/snippets/specialization_binary_op_subscr_str_int_latin1_singleton.py b/extra_tests/snippets/specialization_binary_op_subscr_str_int_latin1_singleton.py deleted file mode 100644 index 9220824b20b..00000000000 --- a/extra_tests/snippets/specialization_binary_op_subscr_str_int_latin1_singleton.py +++ /dev/null @@ -1,13 +0,0 @@ -def check_latin1_subscr_singleton_after_warmup(): - for s in ("abc", "éx"): - first = None - for i in range(5000): - c = s[0] - if i >= 4500: - if first is None: - first = c - else: - assert c is first - - -check_latin1_subscr_singleton_after_warmup() diff --git a/extra_tests/snippets/vm_specialization.py b/extra_tests/snippets/vm_specialization.py new file mode 100644 index 00000000000..0cc3b0b5f92 --- /dev/null +++ b/extra_tests/snippets/vm_specialization.py @@ -0,0 +1,68 @@ +## BinaryOp inplace-add unicode: deopt falls back to __add__/__iadd__ + +class S(str): + def __add__(self, other): + return "ADD" + + def __iadd__(self, other): + return "IADD" + + +def add_path_fallback_uses_add(): + x = "a" + y = "b" + for i in range(1200): + if i == 600: + x = S("s") + y = "t" + x = x + y + return x + + +def iadd_path_fallback_uses_iadd(): + x = "a" + y = "b" + for i in range(1200): + if i == 600: + x = S("s") + y = "t" + x += y + return x + + +assert add_path_fallback_uses_add().startswith("ADD") +assert iadd_path_fallback_uses_iadd().startswith("IADD") + + +## BINARY_SUBSCR_STR_INT: ASCII singleton identity + +def check_ascii_subscr_singleton_after_warmup(): + s = "abc" + first = None + for i in range(4000): + c = s[0] + if i >= 3500: + if first is None: + first = c + else: + assert c is first + + +check_ascii_subscr_singleton_after_warmup() + + +## BINARY_SUBSCR_STR_INT: Latin-1 singleton identity + +def check_latin1_subscr_singleton_after_warmup(): + for s in ("abc", "éx"): + first = None + for i in range(5000): + c = s[0] + if i >= 4500: + if first is None: + first = c + else: + assert c is first + + +check_latin1_subscr_singleton_after_warmup()