Source code for pytext.utils.file_io

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import contextlib
import math
import os

# keep PathManager here for more flexibility until PathManager becomes more mature
# in case we want some hacks in PathManager, we can do it here without updating
# the import everywhere in PyText
# TODO: @stevenliu use PathManagerFactory after it's released to PyPI
from iopath.common.file_io import HTTPURLHandler, PathManager as PathManagerBase


PathManager = PathManagerBase()


[docs]def register_http_url_handler(): """ support reading file from url starting with "http://", "https://", "ftp://" """ PathManager.register_handler(HTTPURLHandler(), allow_override=True)
[docs]def chunk_file(file_path, chunks, work_dir): """Splits a large file by line into number of chunks and writes them into work_dir""" with PathManager.open(file_path) as fin: num_lines = sum(1 for line in fin) chunk_size = math.ceil(num_lines / chunks) output_file_paths = [] with contextlib.ExitStack() as stack: fin = stack.enter_context(PathManager.open(file_path)) for i, line in enumerate(fin): if not i % chunk_size: file_split = "{}.chunk_{}".format( os.path.join(work_dir, os.path.basename(file_path)), i // chunk_size ) output_file_paths.append(file_split) fout = stack.enter_context(open(file_split, "w")) fout.write("{}\n".format(line.strip())) return output_file_paths