2023-11-07 09:46:13 +00:00
|
|
|
import os
|
|
|
|
import shutil
|
|
|
|
from unittest import mock
|
|
|
|
|
|
|
|
from lightning.data.streaming import reader
|
|
|
|
from lightning.data.streaming.cache import Cache
|
2023-11-16 23:06:58 +00:00
|
|
|
from lightning.data.streaming.config import ChunkedIndex
|
2023-11-07 09:46:13 +00:00
|
|
|
from lightning_cloud.resolver import Dir
|
|
|
|
|
|
|
|
|
|
|
|
def test_reader_chunk_removal(tmpdir, monkeypatch):
|
|
|
|
cache_dir = os.path.join(tmpdir, "cache_dir")
|
|
|
|
remote_dir = os.path.join(tmpdir, "remote_dir")
|
|
|
|
os.makedirs(cache_dir, exist_ok=True)
|
|
|
|
cache = Cache(input_dir=Dir(path=cache_dir, url=remote_dir), chunk_size=2, max_cache_size=53687091200)
|
|
|
|
|
|
|
|
for i in range(25):
|
|
|
|
cache[i] = i
|
|
|
|
|
|
|
|
cache.done()
|
|
|
|
cache.merge()
|
|
|
|
|
|
|
|
shutil_mock = mock.MagicMock()
|
|
|
|
disk_usage = mock.MagicMock()
|
|
|
|
disk_usage.total = 1230
|
|
|
|
shutil_mock.disk_usage.return_value = disk_usage
|
|
|
|
monkeypatch.setattr(reader, "shutil", shutil_mock)
|
|
|
|
|
|
|
|
shutil.copytree(cache_dir, remote_dir)
|
|
|
|
shutil.rmtree(cache_dir)
|
|
|
|
os.makedirs(cache_dir, exist_ok=True)
|
|
|
|
|
|
|
|
for i in range(25):
|
2023-11-16 23:06:58 +00:00
|
|
|
index = ChunkedIndex(i, cache._get_chunk_index_from_index(i), last_index=i == 24)
|
|
|
|
assert cache[index] == i
|
2023-11-07 09:46:13 +00:00
|
|
|
|
|
|
|
assert len(os.listdir(cache_dir)) == 14
|
|
|
|
|
|
|
|
shutil.rmtree(cache_dir)
|
|
|
|
os.makedirs(cache_dir, exist_ok=True)
|
|
|
|
|
|
|
|
shutil_mock = mock.MagicMock()
|
|
|
|
disk_usage = mock.MagicMock()
|
|
|
|
disk_usage.total = 536870912000
|
|
|
|
shutil_mock.disk_usage.return_value = disk_usage
|
|
|
|
monkeypatch.setattr(reader, "shutil", shutil_mock)
|
|
|
|
|
2023-11-07 19:40:21 +00:00
|
|
|
expected = []
|
2023-11-07 09:46:13 +00:00
|
|
|
for i in range(25):
|
2023-11-07 19:40:21 +00:00
|
|
|
expected.append([i, len(os.listdir(cache_dir))])
|
2023-11-16 23:06:58 +00:00
|
|
|
index = ChunkedIndex(i, cache._get_chunk_index_from_index(i), last_index=i == 24)
|
|
|
|
assert cache[index] == i
|
2023-11-07 09:46:13 +00:00
|
|
|
|
2023-11-07 19:40:21 +00:00
|
|
|
assert expected == [
|
|
|
|
[0, 0],
|
|
|
|
[1, 1],
|
|
|
|
[2, 1],
|
|
|
|
[3, 2],
|
|
|
|
[4, 2],
|
|
|
|
[5, 3],
|
|
|
|
[6, 3],
|
|
|
|
[7, 4],
|
|
|
|
[8, 4],
|
|
|
|
[9, 5],
|
|
|
|
[10, 5],
|
|
|
|
[11, 6],
|
|
|
|
[12, 6],
|
|
|
|
[13, 7],
|
|
|
|
[14, 7],
|
|
|
|
[15, 8],
|
|
|
|
[16, 8],
|
|
|
|
[17, 9],
|
|
|
|
[18, 9],
|
|
|
|
[19, 10],
|
|
|
|
[20, 10],
|
2023-11-16 23:06:58 +00:00
|
|
|
[21, 2],
|
|
|
|
[22, 2],
|
|
|
|
[23, 3],
|
|
|
|
[24, 3],
|
2023-11-07 19:40:21 +00:00
|
|
|
]
|
|
|
|
|
2023-11-16 23:06:58 +00:00
|
|
|
assert len(os.listdir(cache_dir)) in [3, 4]
|