diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 2629ed9e009..48c8f770f81 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -33,11 +33,8 @@ # Rebind for compatibility BlockingIOError = BlockingIOError -# Does io.IOBase finalizer log the exception if the close() method fails? -# The exception is ignored silently by default in release build. -_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) # Does open() check its 'errors' argument? -_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE +_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) def text_encoding(encoding, stacklevel=2): @@ -416,18 +413,12 @@ def __del__(self): if closed: return - if _IOBASE_EMITS_UNRAISABLE: - self.close() - else: - # The try/except block is in case this is called at program - # exit time, when it's possible that globals have already been - # deleted, and then the close() call might fail. Since - # there's nothing we can do about such failures and they annoy - # the end users, we suppress the traceback. - try: - self.close() - except: - pass + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + + # If close() fails, the caller logs the exception with + # sys.unraisablehook. close() must be called at the end at __del__(). + self.close() ### Inquiries ### @@ -632,16 +623,15 @@ def read(self, size=-1): n = self.readinto(b) if n is None: return None + if n < 0 or n > len(b): + raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") del b[n:] return bytes(b) def readall(self): """Read until EOF, using multiple read() call.""" res = bytearray() - while True: - data = self.read(DEFAULT_BUFFER_SIZE) - if not data: - break + while data := self.read(DEFAULT_BUFFER_SIZE): res += data if res: return bytes(res) @@ -666,8 +656,6 @@ def write(self, b): self._unsupported("write") io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) class BufferedIOBase(IOBase): @@ -874,6 +862,10 @@ def __repr__(self): else: return "<{}.{} name={!r}>".format(modname, clsname, name) + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + ### Lower-level APIs ### def fileno(self): @@ -1511,6 +1503,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if isinstance(file, float): raise TypeError('integer argument expected, got float') if isinstance(file, int): + if isinstance(file, bool): + import warnings + warnings.warn("bool is used as a file descriptor", + RuntimeWarning, stacklevel=2) + file = int(file) fd = file if fd < 0: raise ValueError('negative file descriptor') @@ -1569,7 +1566,8 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if not isinstance(fd, int): raise TypeError('expected integer from opener') if fd < 0: - raise OSError('Negative file descriptor') + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') owned_fd = fd if not noinherit_flag: os.set_inheritable(fd, False) @@ -1608,12 +1606,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): raise self._fd = fd - def __del__(self): + def _dealloc_warn(self, source): if self._fd >= 0 and self._closefd and not self.closed: import warnings - warnings.warn('unclosed file %r' % (self,), ResourceWarning, + warnings.warn(f'unclosed file {source!r}', ResourceWarning, stacklevel=2, source=self) - self.close() def __getstate__(self): raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") @@ -1757,7 +1754,7 @@ def close(self): """ if not self.closed: try: - if self._closefd: + if self._closefd and self._fd >= 0: os.close(self._fd) finally: super().close() @@ -2649,6 +2646,10 @@ def readline(self, size=None): def newlines(self): return self._decoder.newlines if self._decoder else None + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. diff --git a/Lib/test/test_file_eintr.py b/Lib/test/test_file_eintr.py index 13260b8e498..fa9c6637fd4 100644 --- a/Lib/test/test_file_eintr.py +++ b/Lib/test/test_file_eintr.py @@ -186,15 +186,7 @@ def test_readall(self): class CTestFileIOSignalInterrupt(TestFileIOSignalInterrupt, unittest.TestCase): modname = '_io' - # TODO: RUSTPYTHON - _io module uses _pyio internally, signal handling differs - @unittest.expectedFailure - def test_readline(self): - super().test_readline() - - @unittest.expectedFailure - def test_readlines(self): - super().test_readlines() - + # TODO: RUSTPYTHON - _io.FileIO.readall uses read_to_end which differs from _pyio.FileIO.readall @unittest.expectedFailure def test_readall(self): super().test_readall() @@ -221,19 +213,6 @@ def test_readall(self): class CTestBufferedIOSignalInterrupt(TestBufferedIOSignalInterrupt, unittest.TestCase): modname = '_io' - # TODO: RUSTPYTHON - _io module uses _pyio internally, signal handling differs - @unittest.expectedFailure - def test_readline(self): - super().test_readline() - - @unittest.expectedFailure - def test_readlines(self): - super().test_readlines() - - @unittest.expectedFailure - def test_readall(self): - super().test_readall() - class PyTestBufferedIOSignalInterrupt(TestBufferedIOSignalInterrupt, unittest.TestCase): modname = '_pyio' @@ -273,19 +252,6 @@ def test_readall(self): class CTestTextIOSignalInterrupt(TestTextIOSignalInterrupt, unittest.TestCase): modname = '_io' - # TODO: RUSTPYTHON - _io module uses _pyio internally, signal handling differs - @unittest.expectedFailure - def test_readline(self): - super().test_readline() - - @unittest.expectedFailure - def test_readlines(self): - super().test_readlines() - - @unittest.expectedFailure - def test_readall(self): - super().test_readall() - class PyTestTextIOSignalInterrupt(TestTextIOSignalInterrupt, unittest.TestCase): modname = '_pyio' diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index edc29b34d54..fdb36ed997d 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -363,27 +363,6 @@ class CAutoFileTests(AutoFileTests, unittest.TestCase): FileIO = _io.FileIO modulename = '_io' - @unittest.expectedFailure # TODO: RUSTPYTHON - def testBlksize(self): - return super().testBlksize() - - @unittest.expectedFailureIfWindows("TODO: RUSTPYTHON") - def testErrnoOnClosedTruncate(self): - return super().testErrnoOnClosedTruncate() - - @unittest.expectedFailure # TODO: RUSTPYTHON - def testMethods(self): - return super().testMethods() - - @unittest.expectedFailure # TODO: RUSTPYTHON - def testOpenDirFD(self): - return super().testOpenDirFD() - - @unittest.expectedFailure # TODO: RUSTPYTHON - def test_subclass_repr(self): - return super().test_subclass_repr() - -@unittest.skipIf(sys.platform == "win32", "TODO: RUSTPYTHON, test setUp errors on Windows") class PyAutoFileTests(AutoFileTests, unittest.TestCase): FileIO = _pyio.FileIO modulename = '_pyio' @@ -506,7 +485,6 @@ def testInvalidFd(self): import msvcrt self.assertRaises(OSError, msvcrt.get_osfhandle, make_bad_fd()) - @unittest.expectedFailure # TODO: RUSTPYTHON def testBooleanFd(self): for fd in False, True: with self.assertWarnsRegex(RuntimeWarning, @@ -634,10 +612,6 @@ def test_open_code(self): actual = f.read() self.assertEqual(expected, actual) - @unittest.expectedFailure # TODO: RUSTPYTHON - def testUnclosedFDOnException(self): - return super().testUnclosedFDOnException() - class PyOtherFileTests(OtherFileTests, unittest.TestCase): FileIO = _pyio.FileIO diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0142208427b..2cf84d9aae9 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -871,6 +871,22 @@ def test_RawIOBase_read(self): self.assertEqual(rawio.read(2), None) self.assertEqual(rawio.read(2), b"") + def test_RawIOBase_read_bounds_checking(self): + # Make sure a `.readinto` call which returns a value outside + # (0, len(buffer)) raises. + class Misbehaved(self.RawIOBase): + def __init__(self, readinto_return) -> None: + self._readinto_return = readinto_return + def readinto(self, b): + return self._readinto_return + + with self.assertRaises(ValueError) as cm: + Misbehaved(2).read(1) + self.assertEqual(str(cm.exception), "readinto returned 2 outside buffer size 1") + for bad_size in (2147483647, sys.maxsize, -1, -1000): + with self.assertRaises(ValueError): + Misbehaved(bad_size).read() + def test_types_have_dict(self): test = ( self.IOBase(), @@ -1204,14 +1220,6 @@ def test_stringio_setstate(self): class PyIOTest(IOTest): pass - @unittest.expectedFailure # TODO: RUSTPYTHON; OSError: Negative file descriptor - def test_bad_opener_negative_1(): - return super().test_bad_opener_negative_1() - - @unittest.expectedFailure # TODO: RUSTPYTHON; OSError: Negative file descriptor - def test_bad_opener_other_negative(): - return super().test_bad_opener_other_negative() - @support.cpython_only class APIMismatchTest(unittest.TestCase): @@ -1288,7 +1296,6 @@ def _with(): # a ValueError. self.assertRaises(ValueError, _with) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_error_through_destructor(self): # Test that the exception state is not modified by a destructor, # even if close() fails. @@ -1811,6 +1818,10 @@ def test_garbage_collection(self): support.gc_collect() self.assertIsNone(wr(), wr) + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_error_through_destructor(self): + return super().test_error_through_destructor() + def test_args_error(self): # Issue #17275 with self.assertRaisesRegex(TypeError, "BufferedReader"): @@ -1824,7 +1835,6 @@ def test_bad_readinto_value(self): bufio.readline() self.assertIsNone(cm.exception.__cause__) - @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: 'bytes' object cannot be interpreted as an integer") def test_bad_readinto_type(self): rawio = self.tp(self.BytesIO(b"12")) rawio.readinto = lambda buf: b'' @@ -1841,7 +1851,6 @@ def test_flush_error_on_close(self): def test_seek_character_device_file(self): return super().test_seek_character_device_file() - @unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: UnsupportedOperation not raised by truncate def test_truncate_on_read_only(self): return super().test_truncate_on_read_only() @@ -1961,7 +1970,6 @@ def _seekrel(bufio): def test_writes_and_truncates(self): self.check_writes(lambda bufio: bufio.truncate(bufio.tell())) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_write_non_blocking(self): raw = self.MockNonBlockWriterIO() bufio = self.tp(raw, 8) @@ -2158,6 +2166,10 @@ def test_slow_close_from_thread(self): class CBufferedWriterTest(BufferedWriterTest, SizeofTest): tp = io.BufferedWriter + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_error_through_destructor(self): + return super().test_error_through_destructor() + def test_initialization(self): rawio = self.MockRawIO() bufio = self.tp(rawio) @@ -2680,6 +2692,10 @@ def test_interleaved_readline_write(self): class CBufferedRandomTest(BufferedRandomTest, SizeofTest): tp = io.BufferedRandom + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_error_through_destructor(self): + return super().test_error_through_destructor() + @unittest.skipIf(sys.platform == 'win32', 'TODO: RUSTPYTHON; cyclic GC not supported, causes file locking') @unittest.expectedFailure # TODO: RUSTPYTHON def test_garbage_collection(self): @@ -3205,7 +3221,6 @@ def flush(self): support.gc_collect() self.assertEqual(record, [1, 2, 3]) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_error_through_destructor(self): # Test that the exception state is not modified by a destructor, # even if close() fails. @@ -4115,6 +4130,10 @@ class CTextIOWrapperTest(TextIOWrapperTest): io = io shutdown_error = "LookupError: unknown encoding: ascii" + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_error_through_destructor(self): + return super().test_error_through_destructor() + @unittest.expectedFailure # TODO: RUSTPYTHON def test_initialization(self): r = self.BytesIO(b"\xc3\xa9\n\n") @@ -4276,7 +4295,6 @@ def test_reconfigure_write_through(self): def test_repr(self): return super().test_repr() - @unittest.expectedFailure # TODO: RUSTPYTHON def test_uninitialized(self): return super().test_uninitialized() @@ -4596,7 +4614,6 @@ def _check_warn_on_dealloc(self, *args, **kwargs): support.gc_collect() self.assertIn(r, str(cm.warning.args[0])) - @unittest.expectedFailure # TODO: RUSTPYTHON def test_warn_on_dealloc(self): self._check_warn_on_dealloc(os_helper.TESTFN, "wb", buffering=0) self._check_warn_on_dealloc(os_helper.TESTFN, "wb") @@ -4621,7 +4638,6 @@ def cleanup_fds(): with warnings_helper.check_no_resource_warning(self): self.open(r, *args, closefd=False, **kwargs) - @unittest.expectedFailure # TODO: RUSTPYTHON @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") def test_warn_on_dealloc_fd(self): self._check_warn_on_dealloc_fd("rb", buffering=0) @@ -4651,14 +4667,12 @@ def test_pickling(self): with self.assertRaisesRegex(TypeError, msg): pickle.dumps(f, protocol) - @unittest.expectedFailure # TODO: RUSTPYTHON @unittest.skipIf( support.is_emscripten, "fstat() of a pipe fd is not supported" ) def test_nonblock_pipe_write_bigbuf(self): self._test_nonblock_pipe_write(16*1024) - @unittest.expectedFailure # TODO: RUSTPYTHON @unittest.skipIf( support.is_emscripten, "fstat() of a pipe fd is not supported" ) @@ -4822,6 +4836,22 @@ class CMiscIOTest(MiscIOTest): name_of_module = "io", "_io" extra_exported = "BlockingIOError", + @unittest.expectedFailure # TODO: RUSTPYTHON; BufferedWriter seeks on non-seekable pipe + def test_nonblock_pipe_write_bigbuf(self): + return super().test_nonblock_pipe_write_bigbuf() + + @unittest.expectedFailure # TODO: RUSTPYTHON; BufferedWriter seeks on non-seekable pipe + def test_nonblock_pipe_write_smallbuf(self): + return super().test_nonblock_pipe_write_smallbuf() + + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_warn_on_dealloc(self): + return super().test_warn_on_dealloc() + + @unittest.expectedFailure # TODO: RUSTPYTHON + def test_warn_on_dealloc_fd(self): + return super().test_warn_on_dealloc_fd() + def test_readinto_buffer_overflow(self): # Issue #18025 class BadReader(self.io.BufferedIOBase): diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 3917c0a76d9..5c5d2a9600f 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1554,8 +1554,6 @@ def test_communicate_epipe_only_stdin(self): p.wait() p.communicate(b"x" * 2**20) - # TODO: RUSTPYTHON - @unittest.expectedFailure @unittest.skipUnless(hasattr(signal, 'SIGUSR1'), "Requires signal.SIGUSR1") @unittest.skipUnless(hasattr(os, 'kill'), diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 635a1c1c85a..4cf233d5f8b 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1050,19 +1050,15 @@ def _test_sparse_file(self, name): s = os.stat(filename) self.assertLess(s.st_blocks * 512, s.st_size) - @unittest.expectedFailureIf(sys.platform == "linux", "TODO: RUSTPYTHON") def test_sparse_file_old(self): self._test_sparse_file("gnu/sparse") - @unittest.expectedFailureIf(sys.platform == "linux", "TODO: RUSTPYTHON") def test_sparse_file_00(self): self._test_sparse_file("gnu/sparse-0.0") - @unittest.expectedFailureIf(sys.platform == "linux", "TODO: RUSTPYTHON") def test_sparse_file_01(self): self._test_sparse_file("gnu/sparse-0.1") - @unittest.expectedFailureIf(sys.platform == "linux", "TODO: RUSTPYTHON") def test_sparse_file_10(self): self._test_sparse_file("gnu/sparse-1.0") diff --git a/crates/common/src/crt_fd.rs b/crates/common/src/crt_fd.rs index 7b8279adbe3..b873ef9c52c 100644 --- a/crates/common/src/crt_fd.rs +++ b/crates/common/src/crt_fd.rs @@ -334,7 +334,21 @@ pub fn close(fd: Owned) -> io::Result<()> { } pub fn ftruncate(fd: Borrowed<'_>, len: Offset) -> io::Result<()> { - cvt(unsafe { suppress_iph!(c::ftruncate(fd.as_raw(), len)) })?; + let ret = unsafe { suppress_iph!(c::ftruncate(fd.as_raw(), len)) }; + // On Windows, _chsize_s returns 0 on success, or a positive error code (errno value) on failure. + // On other platforms, ftruncate returns 0 on success, or -1 on failure with errno set. + #[cfg(windows)] + { + if ret != 0 { + // _chsize_s returns errno directly, convert to Windows error code + let winerror = crate::os::errno_to_winerror(ret); + return Err(io::Error::from_raw_os_error(winerror)); + } + } + #[cfg(not(windows))] + { + cvt(ret)?; + } Ok(()) } diff --git a/crates/derive-impl/src/pystructseq.rs b/crates/derive-impl/src/pystructseq.rs index 6c34844696d..2ebb05075e4 100644 --- a/crates/derive-impl/src/pystructseq.rs +++ b/crates/derive-impl/src/pystructseq.rs @@ -22,6 +22,8 @@ enum FieldKind { struct ParsedField { ident: Ident, kind: FieldKind, + /// Optional cfg attributes for conditional compilation + cfg_attrs: Vec, } /// Parsed field info from struct @@ -31,27 +33,24 @@ struct FieldInfo { } impl FieldInfo { - fn named_fields(&self) -> Vec { + fn named_fields(&self) -> Vec<&ParsedField> { self.fields .iter() .filter(|f| f.kind == FieldKind::Named) - .map(|f| f.ident.clone()) .collect() } - fn visible_fields(&self) -> Vec { + fn visible_fields(&self) -> Vec<&ParsedField> { self.fields .iter() .filter(|f| f.kind != FieldKind::Skipped) - .map(|f| f.ident.clone()) .collect() } - fn skipped_fields(&self) -> Vec { + fn skipped_fields(&self) -> Vec<&ParsedField> { self.fields .iter() .filter(|f| f.kind == FieldKind::Skipped) - .map(|f| f.ident.clone()) .collect() } @@ -82,8 +81,15 @@ fn parse_fields(input: &mut DeriveInput) -> Result { let mut skip = false; let mut unnamed = false; let mut attrs_to_remove = Vec::new(); + let mut cfg_attrs = Vec::new(); for (i, attr) in field.attrs.iter().enumerate() { + // Collect cfg attributes for conditional compilation + if attr.path().is_ident("cfg") { + cfg_attrs.push(attr.clone()); + continue; + } + if !attr.path().is_ident("pystruct_sequence") { continue; } @@ -135,7 +141,11 @@ fn parse_fields(input: &mut DeriveInput) -> Result { FieldKind::Named }; - parsed_fields.push(ParsedField { ident, kind }); + parsed_fields.push(ParsedField { + ident, + kind, + cfg_attrs, + }); } Ok(FieldInfo { @@ -194,18 +204,91 @@ pub(crate) fn impl_pystruct_sequence_data( let skipped_fields = field_info.skipped_fields(); let n_unnamed_fields = field_info.n_unnamed_fields(); - // Generate field index constants for visible fields + // Generate field index constants for visible fields (with cfg guards) let field_indices: Vec<_> = visible_fields .iter() .enumerate() .map(|(i, field)| { - let const_name = format_ident!("{}_INDEX", field.to_string().to_uppercase()); + let const_name = format_ident!("{}_INDEX", field.ident.to_string().to_uppercase()); + let cfg_attrs = &field.cfg_attrs; quote! { + #(#cfg_attrs)* pub const #const_name: usize = #i; } }) .collect(); + // Generate field name entries with cfg guards for named fields + let named_field_names: Vec<_> = named_fields + .iter() + .map(|f| { + let ident = &f.ident; + let cfg_attrs = &f.cfg_attrs; + if cfg_attrs.is_empty() { + quote! { stringify!(#ident), } + } else { + quote! { + #(#cfg_attrs)* + { stringify!(#ident) }, + } + } + }) + .collect(); + + // Generate field name entries with cfg guards for skipped fields + let skipped_field_names: Vec<_> = skipped_fields + .iter() + .map(|f| { + let ident = &f.ident; + let cfg_attrs = &f.cfg_attrs; + if cfg_attrs.is_empty() { + quote! { stringify!(#ident), } + } else { + quote! { + #(#cfg_attrs)* + { stringify!(#ident) }, + } + } + }) + .collect(); + + // Generate into_tuple items with cfg guards + let visible_tuple_items: Vec<_> = visible_fields + .iter() + .map(|f| { + let ident = &f.ident; + let cfg_attrs = &f.cfg_attrs; + if cfg_attrs.is_empty() { + quote! { + ::rustpython_vm::convert::ToPyObject::to_pyobject(self.#ident, vm), + } + } else { + quote! { + #(#cfg_attrs)* + { ::rustpython_vm::convert::ToPyObject::to_pyobject(self.#ident, vm) }, + } + } + }) + .collect(); + + let skipped_tuple_items: Vec<_> = skipped_fields + .iter() + .map(|f| { + let ident = &f.ident; + let cfg_attrs = &f.cfg_attrs; + if cfg_attrs.is_empty() { + quote! { + ::rustpython_vm::convert::ToPyObject::to_pyobject(self.#ident, vm), + } + } else { + quote! { + #(#cfg_attrs)* + { ::rustpython_vm::convert::ToPyObject::to_pyobject(self.#ident, vm) }, + } + } + }) + .collect(); + // Generate TryFromObject impl only when try_from_object=true let try_from_object_impl = if try_from_object { let n_required = visible_fields.len(); @@ -233,6 +316,8 @@ pub(crate) fn impl_pystruct_sequence_data( // Generate try_from_elements trait override only when try_from_object=true let try_from_elements_trait_override = if try_from_object { + let visible_field_idents: Vec<_> = visible_fields.iter().map(|f| &f.ident).collect(); + let skipped_field_idents: Vec<_> = skipped_fields.iter().map(|f| &f.ident).collect(); quote! { fn try_from_elements( elements: Vec<::rustpython_vm::PyObjectRef>, @@ -240,8 +325,8 @@ pub(crate) fn impl_pystruct_sequence_data( ) -> ::rustpython_vm::PyResult { let mut iter = elements.into_iter(); Ok(Self { - #(#visible_fields: iter.next().unwrap().clone().try_into_value(vm)?,)* - #(#skipped_fields: match iter.next() { + #(#visible_field_idents: iter.next().unwrap().clone().try_into_value(vm)?,)* + #(#skipped_field_idents: match iter.next() { Some(v) => v.clone().try_into_value(vm)?, None => vm.ctx.none(), },)* @@ -259,20 +344,14 @@ pub(crate) fn impl_pystruct_sequence_data( // PyStructSequenceData trait impl impl ::rustpython_vm::types::PyStructSequenceData for #data_ident { - const REQUIRED_FIELD_NAMES: &'static [&'static str] = &[#(stringify!(#named_fields),)*]; - const OPTIONAL_FIELD_NAMES: &'static [&'static str] = &[#(stringify!(#skipped_fields),)*]; + const REQUIRED_FIELD_NAMES: &'static [&'static str] = &[#(#named_field_names)*]; + const OPTIONAL_FIELD_NAMES: &'static [&'static str] = &[#(#skipped_field_names)*]; const UNNAMED_FIELDS_LEN: usize = #n_unnamed_fields; fn into_tuple(self, vm: &::rustpython_vm::VirtualMachine) -> ::rustpython_vm::builtins::PyTuple { let items = vec![ - #(::rustpython_vm::convert::ToPyObject::to_pyobject( - self.#visible_fields, - vm, - ),)* - #(::rustpython_vm::convert::ToPyObject::to_pyobject( - self.#skipped_fields, - vm, - ),)* + #(#visible_tuple_items)* + #(#skipped_tuple_items)* ]; ::rustpython_vm::builtins::PyTuple::new_unchecked(items.into_boxed_slice()) } diff --git a/crates/vm/src/exceptions.rs b/crates/vm/src/exceptions.rs index 0e2051e5498..4e8b572e457 100644 --- a/crates/vm/src/exceptions.rs +++ b/crates/vm/src/exceptions.rs @@ -1592,6 +1592,9 @@ pub(super) mod types { filename2: PyAtomicRef>, #[cfg(windows)] winerror: PyAtomicRef>, + // For BlockingIOError: characters written before blocking occurred + // -1 means not set (AttributeError when accessed) + written: AtomicCell, } impl crate::class::PySubclass for PyOSError { @@ -1666,6 +1669,7 @@ pub(super) mod types { filename2: filename2.into(), #[cfg(windows)] winerror: None.into(), + written: AtomicCell::new(-1), }) } @@ -1707,8 +1711,14 @@ pub(super) mod types { #[allow(deprecated)] let exc: &Py = zelf.downcast_ref::().unwrap(); + // Check if this is BlockingIOError - need to handle characters_written + let is_blocking_io_error = + zelf.class() + .is(vm.ctx.exceptions.blocking_io_error.as_ref()); + // SAFETY: slot_init is called during object initialization, // so fields are None and swap result can be safely ignored + let mut set_filename = true; if len <= 5 { // Only set errno/strerror when args len is 2-5 if 2 <= len { @@ -1716,7 +1726,22 @@ pub(super) mod types { let _ = unsafe { exc.strerror.swap(Some(new_args.args[1].clone())) }; } if 3 <= len { - let _ = unsafe { exc.filename.swap(Some(new_args.args[2].clone())) }; + let third_arg = &new_args.args[2]; + // BlockingIOError's 3rd argument can be the number of characters written + if is_blocking_io_error + && !vm.is_none(third_arg) + && crate::protocol::PyNumber::check(third_arg) + && let Ok(written) = third_arg.try_index(vm) + && let Ok(n) = written.try_to_primitive::(vm) + { + exc.written.store(n); + set_filename = false; + // Clear filename that was set in py_new + let _ = unsafe { exc.filename.swap(None) }; + } + if set_filename { + let _ = unsafe { exc.filename.swap(Some(third_arg.clone())) }; + } } #[cfg(windows)] if 4 <= len { @@ -1938,6 +1963,47 @@ pub(super) mod types { fn set_winerror(&self, value: Option, vm: &VirtualMachine) { self.winerror.swap_to_temporary_refs(value, vm); } + + #[pygetset] + fn characters_written(&self, vm: &VirtualMachine) -> PyResult { + let written = self.written.load(); + if written == -1 { + Err(vm.new_attribute_error("characters_written".to_owned())) + } else { + Ok(written) + } + } + + #[pygetset(setter)] + fn set_characters_written( + &self, + value: Option, + vm: &VirtualMachine, + ) -> PyResult<()> { + match value { + None => { + // Deleting the attribute + if self.written.load() == -1 { + Err(vm.new_attribute_error("characters_written".to_owned())) + } else { + self.written.store(-1); + Ok(()) + } + } + Some(v) => { + let n = v + .try_index(vm)? + .try_to_primitive::(vm) + .map_err(|_| { + vm.new_value_error( + "cannot convert characters_written value to isize".to_owned(), + ) + })?; + self.written.store(n); + Ok(()) + } + } + } } #[pyexception(name, base = PyOSError, ctx = "blocking_io_error", impl)] diff --git a/crates/vm/src/stdlib/io.rs b/crates/vm/src/stdlib/io.rs index a02965797bc..0167df2bed4 100644 --- a/crates/vm/src/stdlib/io.rs +++ b/crates/vm/src/stdlib/io.rs @@ -9,6 +9,15 @@ cfg_if::cfg_if! { } } +// EAGAIN constant for BlockingIOError +cfg_if::cfg_if! { + if #[cfg(any(not(target_arch = "wasm32"), target_os = "wasi"))] { + const EAGAIN: i32 = libc::EAGAIN; + } else { + const EAGAIN: i32 = 11; // Standard POSIX value + } +} + use crate::{ PyObjectRef, PyRef, PyResult, TryFromObject, VirtualMachine, builtins::{PyBaseExceptionRef, PyModule}, @@ -159,6 +168,7 @@ mod _io { }, common::wtf8::{Wtf8, Wtf8Buf}, convert::ToPyObject, + exceptions::cstring_error, function::{ ArgBytesLike, ArgIterable, ArgMemoryBuffer, ArgSize, Either, FuncArgs, IntoFuncArgs, OptionalArg, OptionalOption, PySetterValue, @@ -169,6 +179,7 @@ mod _io { recursion::ReprGuard, types::{ Callable, Constructor, DefaultConstructor, Destructor, Initializer, IterNext, Iterable, + Representable, }, vm::VirtualMachine, }; @@ -202,6 +213,35 @@ mod _io { } } + /// Check if an error is an OSError with errno == EINTR. + /// If so, call check_signals() and return Ok(None) to indicate retry. + /// Otherwise, return Ok(Some(val)) for success or Err for other errors. + /// This mirrors CPythons _PyIO_trap_eintr() pattern. + #[cfg(any(not(target_arch = "wasm32"), target_os = "wasi"))] + fn trap_eintr(result: PyResult, vm: &VirtualMachine) -> PyResult> { + match result { + Ok(val) => Ok(Some(val)), + Err(exc) => { + // Check if its an OSError with errno == EINTR + if exc.fast_isinstance(vm.ctx.exceptions.os_error) + && let Ok(errno_attr) = exc.as_object().get_attr("errno", vm) + && let Ok(errno_val) = i32::try_from_object(vm, errno_attr) + && errno_val == libc::EINTR + { + vm.check_signals()?; + return Ok(None); + } + Err(exc) + } + } + } + + /// WASM version: no EINTR handling needed + #[cfg(all(target_arch = "wasm32", not(target_os = "wasi")))] + fn trap_eintr(result: PyResult, _vm: &VirtualMachine) -> PyResult> { + result.map(Some) + } + pub fn new_unsupported_operation(vm: &VirtualMachine, msg: String) -> PyBaseExceptionRef { vm.new_os_subtype_error(unsupported_operation().to_owned(), None, msg) .upcast() @@ -650,17 +690,26 @@ mod _io { if let Some(size) = size.to_usize() { // FIXME: unnecessary zero-init let b = PyByteArray::from(vec![0; size]).into_ref(&vm.ctx); - let n = >::try_from_object( + let n = >::try_from_object( vm, vm.call_method(&instance, "readinto", (b.clone(),))?, )?; - Ok(n.map(|n| { - let mut bytes = b.borrow_buf_mut(); - bytes.truncate(n); - // FIXME: try to use Arc::unwrap on the bytearray to get at the inner buffer - bytes.clone() + Ok(match n { + None => vm.ctx.none(), + Some(n) => { + // Validate the return value is within bounds + if n < 0 || (n as usize) > size { + return Err(vm.new_value_error(format!( + "readinto returned {n} outside buffer size {size}" + ))); + } + let n = n as usize; + let mut bytes = b.borrow_buf_mut(); + bytes.truncate(n); + // FIXME: try to use Arc::unwrap on the bytearray to get at the inner buffer + bytes.clone().to_pyobject(vm) + } }) - .to_pyobject(vm)) } else { vm.call_method(&instance, "readall", ()) } @@ -671,7 +720,14 @@ mod _io { let mut chunks = Vec::new(); let mut total_len = 0; loop { - let data = vm.call_method(&instance, "read", (DEFAULT_BUFFER_SIZE,))?; + // Loop with EINTR handling (PEP 475) + let data = loop { + let res = vm.call_method(&instance, "read", (DEFAULT_BUFFER_SIZE,)); + match trap_eintr(res, vm)? { + Some(val) => break val, + None => continue, + } + }; let data = >::try_from_object(vm, data)?; match data { None => { @@ -885,12 +941,20 @@ mod _io { while self.write_pos < self.write_end { let n = self.raw_write(None, self.write_pos as usize..self.write_end as usize, vm)?; - let n = n.ok_or_else(|| { - vm.new_exception_msg( - vm.ctx.exceptions.blocking_io_error.to_owned(), - "write could not complete without blocking".to_owned(), - ) - })?; + let n = match n { + Some(n) => n, + None => { + // BlockingIOError(errno, msg, characters_written=0) + return Err(vm.invoke_exception( + vm.ctx.exceptions.blocking_io_error.to_owned(), + vec![ + vm.new_pyobj(EAGAIN), + vm.new_pyobj("write could not complete without blocking"), + vm.new_pyobj(0), + ], + )?); + } + }; self.write_pos += n as Offset; self.raw_pos = self.write_pos; vm.check_signals()?; @@ -1082,15 +1146,16 @@ mod _io { let buffer_size = self.buffer.len() as _; self.adjust_position(buffer_size); self.write_end = buffer_size; - // TODO: BlockingIOError(errno, msg, written) - // written += self.buffer.len(); - return Err(vm - .new_os_subtype_error( - vm.ctx.exceptions.blocking_io_error.to_owned(), - None, - "write could not complete without blocking".to_owned(), - ) - .upcast()); + // BlockingIOError(errno, msg, characters_written) + let chars_written = written + buffer_len; + return Err(vm.invoke_exception( + vm.ctx.exceptions.blocking_io_error.to_owned(), + vec![ + vm.new_pyobj(EAGAIN), + vm.new_pyobj("write could not complete without blocking"), + vm.new_pyobj(chars_written), + ], + )?); } else { break; } @@ -1156,7 +1221,7 @@ mod _io { } }; } - while remaining > 0 { + while remaining > 0 && !self.buffer.is_empty() { // MINUS_LAST_BLOCK() in CPython let r = self.buffer.len() * (remaining / self.buffer.len()); if r == 0 { @@ -1227,30 +1292,60 @@ mod _io { )? .into_ref(&vm.ctx); - // TODO: loop if readinto() raises an interrupt - let res = - vm.call_method(self.raw.as_ref().unwrap(), "readinto", (mem_obj.clone(),)); + // Loop if readinto() raises EINTR (PEP 475) + let res = loop { + let res = vm.call_method( + self.raw.as_ref().unwrap(), + "readinto", + (mem_obj.clone(),), + ); + match trap_eintr(res, vm) { + Ok(Some(val)) => break Ok(val), + Ok(None) => continue, // EINTR, retry + Err(e) => break Err(e), + } + }; mem_obj.release(); + // Always restore the buffer, even if an error occurred *v = read_buf.take(); res? } Either::B(buf) => { - let mem_obj = PyMemoryView::from_buffer_range(buf, buf_range, vm)?; - // TODO: loop if readinto() raises an interrupt - vm.call_method(self.raw.as_ref().unwrap(), "readinto", (mem_obj,))? + let mem_obj = + PyMemoryView::from_buffer_range(buf, buf_range, vm)?.into_ref(&vm.ctx); + // Loop if readinto() raises EINTR (PEP 475) + loop { + let res = vm.call_method( + self.raw.as_ref().unwrap(), + "readinto", + (mem_obj.clone(),), + ); + match trap_eintr(res, vm)? { + Some(val) => break val, + None => continue, + } + } } }; if vm.is_none(&res) { return Ok(None); } - let n = isize::try_from_object(vm, res)?; + // Try to convert to int; if it fails, treat as -1 and chain the TypeError + let (n, type_error) = match isize::try_from_object(vm, res.clone()) { + Ok(n) => (n, None), + Err(e) => (-1, Some(e)), + }; if n < 0 || n as usize > len { - return Err(vm.new_os_error(format!( + let os_error = vm.new_os_error(format!( "raw readinto() returned invalid length {n} (should have been between 0 and {len})" - ))); + )); + if let Some(cause) = type_error { + os_error.set___cause__(Some(cause)); + } + return Err(os_error); } if n > 0 && self.abs_pos != -1 { self.abs_pos += n as Offset @@ -1293,7 +1388,14 @@ mod _io { let mut read_size = 0; loop { - let read_data = vm.call_method(self.raw.as_ref().unwrap(), "read", ())?; + // Loop with EINTR handling (PEP 475) + let read_data = loop { + let res = vm.call_method(self.raw.as_ref().unwrap(), "read", ()); + match trap_eintr(res, vm)? { + Some(val) => break val, + None => continue, + } + }; let read_data = >::try_from_object(vm, read_data)?; match read_data { @@ -1564,9 +1666,10 @@ mod _io { let pos = pos.flatten().to_pyobject(vm); let mut data = zelf.lock(vm)?; data.check_init(vm)?; - if data.writable() { - data.flush_rewind(vm)?; + if !data.writable() { + return Err(new_unsupported_operation(vm, "truncate".to_owned())); } + data.flush_rewind(vm)?; let res = vm.call_method(data.raw.as_ref().unwrap(), "truncate", (pos,))?; let _ = data.raw_tell(vm); Ok(res) @@ -2400,7 +2503,13 @@ mod _io { None if vm.state.config.settings.utf8_mode > 0 => { identifier_utf8!(vm, utf_8).to_owned() } - Some(enc) if enc.as_str() != "locale" => enc, + Some(enc) if enc.as_str() != "locale" => { + // Check for embedded null character + if enc.as_str().contains('\0') { + return Err(cstring_error(vm)); + } + enc + } _ => { // None without utf8_mode or "locale" encoding vm.import("locale", 0)? @@ -2414,6 +2523,11 @@ mod _io { .errors .unwrap_or_else(|| identifier!(vm, strict).to_owned()); + // Check for embedded null character in errors (use as_wtf8 to handle surrogates) + if errors.as_wtf8().as_bytes().contains(&0) { + return Err(cstring_error(vm)); + } + let has_read1 = vm.get_attribute_opt(buffer.clone(), "read1")?.is_some(); let seekable = vm.call_method(&buffer, "seekable", ())?.try_to_bool(vm)?; @@ -2519,7 +2633,14 @@ mod _io { } #[pyclass( - with(Constructor, Initializer, Destructor, Iterable, IterNext), + with( + Constructor, + Initializer, + Destructor, + Iterable, + IterNext, + Representable + ), flags(BASETYPE) )] impl TextIOWrapper { @@ -3396,6 +3517,46 @@ mod _io { } } + impl Representable for TextIOWrapper { + #[inline] + fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult { + let type_name = zelf.class().slot_name(); + let Some(data) = zelf.data.lock() else { + // Reentrant call + return Ok(format!("<{type_name}>")); + }; + let Some(data) = data.as_ref() else { + return Err(vm.new_value_error("I/O operation on uninitialized object".to_owned())); + }; + + let mut result = format!("<{type_name}"); + + // Add name if present + if let Ok(Some(name)) = vm.get_attribute_opt(data.buffer.clone(), "name") + && let Ok(name_repr) = name.repr(vm) + { + result.push_str(" name="); + result.push_str(name_repr.as_str()); + } + + // Add mode if present + if let Ok(Some(mode)) = vm.get_attribute_opt(data.buffer.clone(), "mode") + && let Ok(mode_repr) = mode.repr(vm) + { + result.push_str(" mode="); + result.push_str(mode_repr.as_str()); + } + + // Add encoding + result.push_str(" encoding='"); + result.push_str(data.encoding.as_str()); + result.push('\''); + + result.push('>'); + Ok(result) + } + } + impl Iterable for TextIOWrapper { fn slot_iter(zelf: PyObjectRef, vm: &VirtualMachine) -> PyResult { check_closed(&zelf, vm)?; @@ -4465,7 +4626,7 @@ mod fileio { } #[pyattr] - #[pyclass(module = "io", name, base = _RawIOBase)] + #[pyclass(module = "_io", name, base = _RawIOBase)] #[derive(Debug)] pub(super) struct FileIO { _base: _RawIOBase, @@ -4473,6 +4634,7 @@ mod fileio { closefd: AtomicCell, mode: AtomicCell, seekable: AtomicCell>, + blksize: AtomicCell, } #[derive(FromArgs)] @@ -4495,6 +4657,7 @@ mod fileio { closefd: AtomicCell::new(true), mode: AtomicCell::new(Mode::empty()), seekable: AtomicCell::new(None), + blksize: AtomicCell::new(8 * 1024), // DEFAULT_BUFFER_SIZE } } } @@ -4507,6 +4670,15 @@ mod fileio { fn init(zelf: PyRef, args: Self::Args, vm: &VirtualMachine) -> PyResult<()> { // TODO: let atomic_flag_works let name = args.name; + // Check if bool is used as file descriptor + if name.class().is(vm.ctx.types.bool_type) { + crate::stdlib::warnings::warn( + vm.ctx.exceptions.runtime_warning, + "bool is used as a file descriptor".to_owned(), + 1, + vm, + )?; + } let arg_fd = if let Some(i) = name.downcast_ref::() { let fd = i.try_to_primitive(vm)?; if fd < 0 { @@ -4557,6 +4729,7 @@ mod fileio { } } }; + let fd_is_own = arg_fd.is_none(); zelf.fd.store(fd); let fd = unsafe { crt_fd::Borrowed::borrow_raw(fd) }; let filename = filename.unwrap_or(OsPathOrFd::Fd(fd)); @@ -4576,12 +4749,25 @@ mod fileio { match fd_fstat { Ok(status) => { if (status.st_mode & libc::S_IFMT) == libc::S_IFDIR { + // If fd was passed by user, don't close it on error + if !fd_is_own { + zelf.fd.store(-1); + } let err = std::io::Error::from_raw_os_error(libc::EISDIR); return Err(OSErrorBuilder::with_filename(&err, filename, vm)); } + // Store st_blksize for _blksize property + if status.st_blksize > 1 { + #[allow(clippy::useless_conversion)] // needed for 32-bit platforms + zelf.blksize.store(i64::from(status.st_blksize)); + } } Err(err) => { if err.raw_os_error() == Some(libc::EBADF) { + // If fd was passed by user, don't close it on error + if !fd_is_own { + zelf.fd.store(-1); + } return Err(OSErrorBuilder::with_filename(&err, filename, vm)); } } @@ -4590,7 +4776,13 @@ mod fileio { #[cfg(windows)] crate::stdlib::msvcrt::setmode_binary(fd); - zelf.as_object().set_attr("name", name, vm)?; + if let Err(e) = zelf.as_object().set_attr("name", name, vm) { + // If fd was passed by user, don't close it on error + if !fd_is_own { + zelf.fd.store(-1); + } + return Err(e); + } if mode.contains(Mode::APPENDING) { let _ = os::lseek(fd, 0, libc::SEEK_END, vm); @@ -4603,17 +4795,18 @@ mod fileio { impl Representable for FileIO { #[inline] fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult { + let type_name = zelf.class().slot_name(); let fd = zelf.fd.load(); if fd < 0 { - return Ok("<_io.FileIO [closed]>".to_owned()); + return Ok(format!("<{type_name} [closed]>")); } let name_repr = repr_file_obj_name(zelf.as_object(), vm)?; let mode = zelf.mode(); let closefd = if zelf.closefd.load() { "True" } else { "False" }; let repr = if let Some(name_repr) = name_repr { - format!("<_io.FileIO name={name_repr} mode='{mode}' closefd={closefd}>") + format!("<{type_name} name={name_repr} mode='{mode}' closefd={closefd}>") } else { - format!("<_io.FileIO fd={fd} mode='{mode}' closefd={closefd}>") + format!("<{type_name} fd={fd} mode='{mode}' closefd={closefd}>") }; Ok(repr) } @@ -4648,6 +4841,11 @@ mod fileio { self.closefd.load() } + #[pygetset(name = "_blksize")] + fn blksize(&self) -> i64 { + self.blksize.load() + } + #[pymethod] fn fileno(&self, vm: &VirtualMachine) -> PyResult { let fd = self.fd.load(); @@ -4664,13 +4862,19 @@ mod fileio { } #[pymethod] - fn readable(&self) -> bool { - self.mode.load().contains(Mode::READABLE) + fn readable(&self, vm: &VirtualMachine) -> PyResult { + if self.fd.load() < 0 { + return Err(io_closed_error(vm)); + } + Ok(self.mode.load().contains(Mode::READABLE)) } #[pymethod] - fn writable(&self) -> bool { - self.mode.load().contains(Mode::WRITABLE) + fn writable(&self, vm: &VirtualMachine) -> PyResult { + if self.fd.load() < 0 { + return Err(io_closed_error(vm)); + } + Ok(self.mode.load().contains(Mode::WRITABLE)) } #[pygetset] @@ -4714,16 +4918,32 @@ mod fileio { let mut handle = zelf.get_fd(vm)?; let bytes = if let Some(read_byte) = read_byte.to_usize() { let mut bytes = vec![0; read_byte]; - let n = handle - .read(&mut bytes) - .map_err(|err| Self::io_error(zelf, err, vm))?; + // Loop on EINTR (PEP 475) + let n = loop { + match handle.read(&mut bytes) { + Ok(n) => break n, + Err(e) if e.raw_os_error() == Some(libc::EINTR) => { + vm.check_signals()?; + continue; + } + Err(e) => return Err(Self::io_error(zelf, e, vm)), + } + }; bytes.truncate(n); bytes } else { let mut bytes = vec![]; - handle - .read_to_end(&mut bytes) - .map_err(|err| Self::io_error(zelf, err, vm))?; + // Loop on EINTR (PEP 475) + loop { + match handle.read_to_end(&mut bytes) { + Ok(_) => break, + Err(e) if e.raw_os_error() == Some(libc::EINTR) => { + vm.check_signals()?; + continue; + } + Err(e) => return Err(Self::io_error(zelf, e, vm)), + } + } bytes }; @@ -4743,9 +4963,17 @@ mod fileio { let mut buf = obj.borrow_buf_mut(); let mut f = handle.take(buf.len() as _); - let ret = f - .read(&mut buf) - .map_err(|err| Self::io_error(zelf, err, vm))?; + // Loop on EINTR (PEP 475) + let ret = loop { + match f.read(&mut buf) { + Ok(n) => break n, + Err(e) if e.raw_os_error() == Some(libc::EINTR) => { + vm.check_signals()?; + continue; + } + Err(e) => return Err(Self::io_error(zelf, e, vm)), + } + }; Ok(ret) } diff --git a/crates/vm/src/stdlib/os.rs b/crates/vm/src/stdlib/os.rs index f20046a1a66..6849ea365df 100644 --- a/crates/vm/src/stdlib/os.rs +++ b/crates/vm/src/stdlib/os.rs @@ -893,6 +893,15 @@ pub(super) mod _os { #[pyarg(any, default)] #[pystruct_sequence(skip)] pub st_ctime_ns: i128, + // Unix-specific attributes + #[cfg(not(windows))] + #[pyarg(any, default)] + #[pystruct_sequence(skip)] + pub st_blksize: i64, + #[cfg(not(windows))] + #[pyarg(any, default)] + #[pystruct_sequence(skip)] + pub st_blocks: i64, #[pyarg(any, default)] #[pystruct_sequence(skip)] pub st_reparse_tag: u32, @@ -945,6 +954,13 @@ pub(super) mod _os { #[cfg(not(windows))] let st_ino = stat.st_ino; + #[cfg(not(windows))] + #[allow(clippy::useless_conversion)] // needed for 32-bit platforms + let st_blksize = i64::from(stat.st_blksize); + #[cfg(not(windows))] + #[allow(clippy::useless_conversion)] // needed for 32-bit platforms + let st_blocks = i64::from(stat.st_blocks); + Self { st_mode: vm.ctx.new_pyref(stat.st_mode), st_ino: vm.ctx.new_pyref(st_ino), @@ -962,6 +978,10 @@ pub(super) mod _os { st_atime_ns: to_ns(atime), st_mtime_ns: to_ns(mtime), st_ctime_ns: to_ns(ctime), + #[cfg(not(windows))] + st_blksize, + #[cfg(not(windows))] + st_blocks, st_reparse_tag, st_file_attributes, }