imo the whole point of having a rec files is to store your data into one file file for faster reading so that should be preferred over multiple recordIO files.
you can have multiple rec files by creating your custom Dataset class that extends gluon.data.Dataset and implements __getitem__ and __len__
For example:
class CustomCombinedDataset(gluon.data.Dataset):
"""
A dataset that accepts several dataset and serves
them as one
"""
def __init__(self, datasets):
self.datasets = datasets
self.lengths = []
start = 0
for d in datasets:
end = start + len(d)
self.lengths.append((start, end))
start = end
self.length = sum([len(d) for d in datasets])
def __getitem__(self, idx):
current_running = 0
for i, (start, end) in enumerate(self.lengths):
print(start, end, idx)
if idx >= end:
current_running += end
else:
return self.datasets[i][idx - current_running]
def __len__(self):
return self.length
where each dataset in datasets is a gluon.data.RecordFileDataset