Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
gh-129005: Align FileIO.readall allocation
Both now use a pre-allocated buffer of length `bufsize`, fill it using
a readinto, and have matching "expand buffer" logic.

On my machine this takes:

`./python -m test -M8g -uall test_largefile -m test_large_read -v`
from ~3.7 seconds to ~3.4 seconds
  • Loading branch information
cmaloney committed Jan 29, 2025
commit 96078b08952d0fd992dfd6c79b8dbd044665c469
27 changes: 18 additions & 9 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -1674,22 +1674,31 @@ def readall(self):
except OSError:
pass

result = bytearray()
result = bytearray(bufsize)
bytes_read = 0
while True:
if len(result) >= bufsize:
bufsize = len(result)
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
n = bufsize - len(result)
if bytes_read >= bufsize:
# Parallels _io/fileio.c new_buffersize
if bufsize > 65536:
addend = bufsize >> 3
else:
addend = bufsize + 256
if addend < DEFAULT_BUFFER_SIZE:
addend = DEFAULT_BUFFER_SIZE
bufsize += addend
result[bytes_read:bufsize] = b'\0'
assert bufsize - bytes_read > 0, "Should always try and read at least one byte"
try:
chunk = os.read(self._fd, n)
n = os.readinto(self._fd, memoryview(result)[bytes_read:])
except BlockingIOError:
if result:
if bytes_read > 0:
break
return None
if not chunk: # reached the end of the file
if n == 0: # reached the end of the file
break
result += chunk
bytes_read += n

del result[bytes_read:]
return bytes(result)

def readinto(self, buffer):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
``_pyio.FileIO.readall`` now allocates, resizes, and fills a data buffer using the same algorithm ``_io.FileIO.readall`` uses.
Comment thread
vstinner marked this conversation as resolved.
Outdated