Make input dir in DataProcessor required (#18910)

This commit is contained in:
Adrian Wälchli 2023-11-18 12:24:30 -05:00 committed by GitHub
parent b757edc401
commit 61a286d62b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 4 deletions

View File

@ -731,7 +731,7 @@ class DataTransformRecipe(DataRecipe):
class DataProcessor:
def __init__(
self,
input_dir: Optional[Union[str, Dir]] = None,
input_dir: Union[str, Dir],
output_dir: Optional[Union[str, Dir]] = None,
num_workers: Optional[int] = None,
num_downloaders: Optional[int] = None,
@ -771,9 +771,8 @@ class DataProcessor:
self.stop_queues: List[Queue] = []
self.reorder_files = reorder_files
if self.input_dir:
# Ensure the input dir is the same across all nodes
self.input_dir = self._broadcast_object(self.input_dir)
# Ensure the input dir is the same across all nodes
self.input_dir = self._broadcast_object(self.input_dir)
if self.output_dir:
# Ensure the output dir is the same across all nodes