Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/derive-impl/src/pyclass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,7 @@ where
.iter()
.any(|arg| matches!(arg, syn::FnArg::Receiver(_)));
let drop_first_typed = match self.inner.attr_name {
AttrName::Method | AttrName::ClassMethod if !has_receiver => 1,
AttrName::Method | AttrName::ClassMethod if !has_receiver && !raw => 1,
_ => 0,
};
let call_flags = infer_native_call_flags(func.sig(), drop_first_typed);
Expand Down
85 changes: 68 additions & 17 deletions crates/vm/src/builtins/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,10 @@ impl PyFunction {
}

impl Py<PyFunction> {
pub(crate) fn is_optimized_for_call_specialization(&self) -> bool {
self.code.flags.contains(bytecode::CodeFlags::OPTIMIZED)
}

pub fn invoke_with_locals(
&self,
func_args: FuncArgs,
Expand Down Expand Up @@ -636,43 +640,90 @@ impl Py<PyFunction> {
new_v
}

/// function_kind(SIMPLE_FUNCTION) equivalent for CALL specialization.
/// Returns true if: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonly args.
pub(crate) fn is_simple_for_call_specialization(&self) -> bool {
let code: &Py<PyCode> = &self.code;
let flags = code.flags;
flags.contains(bytecode::CodeFlags::OPTIMIZED)
&& !flags.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS)
&& code.kwonlyarg_count == 0
}

/// Check if this function is eligible for exact-args call specialization.
/// Returns true if: no VARARGS, no VARKEYWORDS, no kwonly args, not generator/coroutine,
/// Returns true if: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonly args,
/// and effective_nargs matches co_argcount.
pub(crate) fn can_specialize_call(&self, effective_nargs: u32) -> bool {
let code: &Py<PyCode> = &self.code;
let flags = code.flags;
flags.contains(bytecode::CodeFlags::NEWLOCALS)
&& !flags.intersects(
bytecode::CodeFlags::VARARGS
| bytecode::CodeFlags::VARKEYWORDS
| bytecode::CodeFlags::GENERATOR
| bytecode::CodeFlags::COROUTINE,
)
flags.contains(bytecode::CodeFlags::OPTIMIZED)
&& !flags.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS)
&& code.kwonlyarg_count == 0
&& code.arg_count == effective_nargs
}

/// Runtime guard for CALL_*_EXACT_ARGS specialization: check only argcount.
/// Other invariants are guaranteed by function versioning and specialization-time checks.
#[inline]
pub(crate) fn has_exact_argcount(&self, effective_nargs: u32) -> bool {
self.code.arg_count == effective_nargs
}

/// Bytes required for this function's frame on RustPython's thread datastack.
/// Returns `None` for generator/coroutine code paths that do not push a
/// regular datastack-backed frame in the fast call path.
pub(crate) fn datastack_frame_size_bytes(&self) -> Option<usize> {
let code: &Py<PyCode> = &self.code;
if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
return None;
}
let nlocalsplus = code
.varnames
.len()
.checked_add(code.cellvars.len())?
.checked_add(code.freevars.len())?;
let capacity = nlocalsplus.checked_add(code.max_stackdepth as usize)?;
capacity.checked_mul(core::mem::size_of::<usize>())
}

/// Fast path for calling a simple function with exact positional args.
/// Skips FuncArgs allocation, prepend_arg, and fill_locals_from_args.
/// Only valid when: no VARARGS, no VARKEYWORDS, no kwonlyargs, not generator/coroutine,
/// Only valid when: CO_OPTIMIZED, no VARARGS, no VARKEYWORDS, no kwonlyargs,
/// and nargs == co_argcount.
pub fn invoke_exact_args(&self, mut args: Vec<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
let code: PyRef<PyCode> = (*self.code).to_owned();

debug_assert_eq!(args.len(), code.arg_count as usize);
debug_assert!(code.flags.contains(bytecode::CodeFlags::NEWLOCALS));
debug_assert!(!code.flags.intersects(
bytecode::CodeFlags::VARARGS
| bytecode::CodeFlags::VARKEYWORDS
| bytecode::CodeFlags::GENERATOR
| bytecode::CodeFlags::COROUTINE
));
debug_assert!(code.flags.contains(bytecode::CodeFlags::OPTIMIZED));
debug_assert!(
!code
.flags
.intersects(bytecode::CodeFlags::VARARGS | bytecode::CodeFlags::VARKEYWORDS)
);
debug_assert_eq!(code.kwonlyarg_count, 0);

// Generator/coroutine code objects are SIMPLE_FUNCTION in call
// specialization classification, but their call path must still
// go through invoke() to produce generator/coroutine objects.
if code
.flags
.intersects(bytecode::CodeFlags::GENERATOR | bytecode::CodeFlags::COROUTINE)
{
return self.invoke(FuncArgs::from(args), vm);
}

let locals = if code.flags.contains(bytecode::CodeFlags::NEWLOCALS) {
None
} else {
Some(ArgMapping::from_dict_exact(self.globals.clone()))
};

let frame = Frame::new(
code.clone(),
Scope::new(None, self.globals.clone()),
Scope::new(locals, self.globals.clone()),
self.builtins.clone(),
self.closure.as_ref().map_or(&[], |c| c.as_slice()),
Some(self.to_owned().into()),
Expand Down
18 changes: 17 additions & 1 deletion crates/vm/src/builtins/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,16 @@ impl PyList {

fn _setitem(&self, needle: &PyObject, value: PyObjectRef, vm: &VirtualMachine) -> PyResult<()> {
match SequenceIndex::try_from_borrowed_object(vm, needle, "list")? {
SequenceIndex::Int(index) => self.borrow_vec_mut().setitem_by_index(vm, index, value),
SequenceIndex::Int(index) => self
.borrow_vec_mut()
.setitem_by_index(vm, index, value)
.map_err(|e| {
if e.class().is(vm.ctx.exceptions.index_error) {
vm.new_index_error("list assignment index out of range".to_owned())
} else {
e
}
}),
SequenceIndex::Slice(slice) => {
let sec = extract_cloned(&value, Ok, vm)?;
self.borrow_vec_mut().setitem_by_slice(vm, slice, &sec)
Expand Down Expand Up @@ -509,6 +518,13 @@ impl AsSequence for PyList {
} else {
zelf.borrow_vec_mut().delitem_by_index(vm, i)
}
.map_err(|e| {
if e.class().is(vm.ctx.exceptions.index_error) {
vm.new_index_error("list assignment index out of range".to_owned())
} else {
e
}
})
}),
contains: atomic_func!(|seq, target, vm| {
let zelf = PyList::sequence_downcast(seq);
Expand Down
33 changes: 18 additions & 15 deletions crates/vm/src/builtins/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,6 @@ impl Constructor for PyBaseObject {
}
}

// more or less __new__ operator
// Only create dict if the class has HAS_DICT flag (i.e., __slots__ was not defined
// or __dict__ is in __slots__)
let dict = if cls
.slots
.flags
.has_feature(crate::types::PyTypeFlags::HAS_DICT)
{
Some(vm.ctx.new_dict())
} else {
None
};

// Ensure that all abstract methods are implemented before instantiating instance.
if let Some(abs_methods) = cls.get_attr(identifier!(vm, __abstractmethods__))
&& let Some(unimplemented_abstract_method_count) = abs_methods.length_opt(vm)
Expand Down Expand Up @@ -109,14 +96,29 @@ impl Constructor for PyBaseObject {
}
}

Ok(crate::PyRef::new_ref(Self, cls, dict).into())
generic_alloc(cls, 0, vm)
}

fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
unimplemented!("use slot_new")
}
}

pub(crate) fn generic_alloc(cls: PyTypeRef, _nitems: usize, vm: &VirtualMachine) -> PyResult {
// Only create dict if the class has HAS_DICT flag (i.e., __slots__ was not defined
// or __dict__ is in __slots__)
let dict = if cls
.slots
.flags
.has_feature(crate::types::PyTypeFlags::HAS_DICT)
{
Some(vm.ctx.new_dict())
} else {
None
};
Ok(crate::PyRef::new_ref(PyBaseObject, cls, dict).into())
}

impl Initializer for PyBaseObject {
type Args = FuncArgs;

Expand Down Expand Up @@ -561,8 +563,9 @@ pub fn object_set_dict(obj: PyObjectRef, dict: PyDictRef, vm: &VirtualMachine) -
}

pub fn init(ctx: &'static Context) {
// Manually set init slot - derive macro doesn't generate extend_slots
// Manually set alloc/init slots - derive macro doesn't generate extend_slots
// for trait impl that overrides #[pyslot] method
ctx.types.object_type.slots.alloc.store(Some(generic_alloc));
ctx.types
.object_type
.slots
Expand Down
29 changes: 25 additions & 4 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1500,14 +1500,25 @@ impl PyRef<PyStr> {
}

pub fn concat_in_place(&mut self, other: &Wtf8, vm: &VirtualMachine) {
// TODO: call [A]Rc::get_mut on the str to try to mutate the data in place
if other.is_empty() {
return;
}
let mut s = Wtf8Buf::with_capacity(self.byte_len() + other.len());
s.push_wtf8(self.as_ref());
s.push_wtf8(other);
*self = PyStr::from(s).into_ref(&vm.ctx);
if self.as_object().strong_count() == 1 {
// SAFETY: strong_count()==1 guarantees unique ownership of this PyStr.
// Mutating payload in place preserves semantics while avoiding PyObject reallocation.
unsafe {
let payload = self.payload() as *const PyStr as *mut PyStr;
(*payload).data = PyStr::from(s).data;
(*payload)
.hash
.store(hash::SENTINEL, atomic::Ordering::Relaxed);
}
} else {
*self = PyStr::from(s).into_ref(&vm.ctx);
}
}

pub fn try_into_utf8(self, vm: &VirtualMachine) -> PyResult<PyRef<PyUtf8Str>> {
Expand Down Expand Up @@ -1678,13 +1689,23 @@ impl ToPyObject for Wtf8Buf {

impl ToPyObject for char {
fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.new_str(self).into()
let cp = self as u32;
if cp <= u8::MAX as u32 {
vm.ctx.latin1_char_cache[cp as usize].clone().into()
} else {
vm.ctx.new_str(self).into()
}
}
}

impl ToPyObject for CodePoint {
fn to_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
vm.ctx.new_str(self).into()
let cp = self.to_u32();
if cp <= u8::MAX as u32 {
vm.ctx.latin1_char_cache[cp as usize].clone().into()
} else {
vm.ctx.new_str(self).into()
}
}
}

Expand Down
Loading
Loading