Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions Lib/test/test_wave.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@
import wave


class _ReadSizeRecorder(io.BytesIO):
# A seekable file that remembers the largest size ever passed to read()
# (so a test can check that wave does not request far more data than the
# file actually holds, which on a real file would pre-allocate it), and
# that rejects seeks to offsets overflowing a C ssize_t the way a 32-bit
# platform such as WASI does (so a test can check that wave never seeks
# to an untrusted chunk size).
_SSIZE_MAX = (1 << 31) - 1

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.max_read_size = 0

def read(self, size=-1):
if size is not None and size >= 0:
self.max_read_size = max(self.max_read_size, size)
return super().read(size)

def seek(self, pos, whence=0):
if abs(pos) > self._SSIZE_MAX:
raise OverflowError("Python int too large to convert to C ssize_t")
return super().seek(pos, whence)


class WaveTest(audiotests.AudioWriteTests,
audiotests.AudioTestsWithSourceFile):
module = wave
Expand Down Expand Up @@ -333,6 +357,25 @@ def test_read_wrong_sample_width(self):
with self.assertRaisesRegex(wave.Error, 'bad sample width'):
wave.open(io.BytesIO(b))

def test_read_data_chunk_size_larger_than_file(self):
# gh-151308: a data chunk header may claim far more data than the
# file actually contains. readframes() must not request (and so,
# on a real file, pre-allocate) the claimed size; reads on a
# seekable file are clamped to the bytes actually available.
real_data = b'\x00' * 10
b = b'RIFF' + struct.pack('<L', 0xFFFFFFFF) + b'WAVE'
b += b'fmt ' + struct.pack('<LHHLLHH', 16, 1, 1, 11025, 11025, 1, 8)
b += b'data' + struct.pack('<L', 0xFFFFFFFF) # bogus, ~4 GiB
b += real_data
# _ReadSizeRecorder also raises OverflowError on a huge seek offset,
# so this exercises the 32-bit (e.g. WASI) path too.
f = _ReadSizeRecorder(b)
with wave.open(f, 'rb') as r:
data = r.readframes(r.getnframes())
self.assertEqual(data, real_data)
# The bogus ~4 GiB size must never reach the underlying read().
self.assertLessEqual(f.max_read_size, len(b))

def test_open_in_write_raises(self):
# gh-136523: Wave_write.__del__ should not throw
with support.catch_unraisable_exception() as cm:
Expand Down
24 changes: 24 additions & 0 deletions Lib/wave.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,30 @@ def read(self, size=-1):
size = self.chunksize - self.size_read
if size > self.chunksize - self.size_read:
size = self.chunksize - self.size_read
# The chunk size comes from the file header and is not trustworthy:
# a truncated or maliciously crafted file can claim a size far larger
# than the data actually present, which would make the read() below
# pre-allocate that many bytes (gh-151308). When the underlying file
# is seekable, clamp the request to the bytes physically available so
# we never allocate more than the file can provide. This leaves the
# data returned for valid files unchanged, since the requested bytes
# are always present. We probe with tell()/seek() rather than trust
# seekable(), since some file objects report being seekable yet raise
# on the actual call; on any failure we fall back to the original
# behaviour. We only probe the raw file object, never a parent
# _Chunk: seeking a _Chunk would seek the raw file to its (untrusted)
# chunk size, which may overflow on 32-bit platforms. Clamping the
# raw read protects the nested chunks too, as they read through it.
if size > 0 and not isinstance(self.file, _Chunk):
try:
here = self.file.tell()
end = self.file.seek(0, 2)
self.file.seek(here, 0)
except (OSError, ValueError):
pass
else:
if isinstance(end, int):
size = min(size, max(0, end - here))
data = self.file.read(size)
self.size_read = self.size_read + len(data)
if self.size_read == self.chunksize and \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
:meth:`wave.Wave_read.readframes` no longer attempts to pre-allocate a huge
buffer when the data chunk header of a truncated or malformed WAV file claims
a size larger than the file actually contains. When the underlying file is
seekable, reads are now clamped to the number of bytes really available.
Loading