Skip to content

hydro_file

The hydro_file module provides utilities for file I/O operations, including reading and writing various data formats commonly used in hydrological applications.

Core Functions

read_ts_xrdataset

1
2
3
4
5
6
7
def read_ts_xrdataset(
    file_path: str,
    var_name: str = None,
    time_name: str = "time",
    lat_name: str = "lat",
    lon_name: str = "lon"
) -> xr.Dataset

Reads time series data from NetCDF files into xarray Dataset format.

Example:

1
2
3
4
5
import hydroutils as hu

# Read NetCDF file
ds = hu.read_ts_xrdataset('data.nc', var_name='precipitation')
print(f"Dataset shape: {ds.dims}")

write_ts_xrdataset

1
2
3
4
5
6
def write_ts_xrdataset(
    ds: xr.Dataset,
    file_path: str,
    var_name: str = None,
    encoding: dict = None
) -> None

Writes xarray Dataset to NetCDF file.

read_csv

1
def read_csv(file_path: str, **kwargs) -> pd.DataFrame

Reads CSV files with enhanced error handling and encoding detection.

write_csv

1
def write_csv(df: pd.DataFrame, file_path: str, **kwargs) -> None

Writes DataFrame to CSV with proper encoding and error handling.

JSON Functions

serialize_json

1
def serialize_json(my_dict: dict, my_file: str) -> None

Saves a dictionary to a JSON file.

unserialize_json

1
def unserialize_json(my_file: str) -> dict

Loads a JSON file into a dictionary.

serialize_json_np

1
def serialize_json_np(my_dict: dict, my_file: str) -> None

Saves a dictionary containing NumPy arrays to a JSON file.

Pickle Functions

serialize_pickle

1
def serialize_pickle(my_object: object, my_file: str) -> None

Saves an object to a pickle file.

unserialize_pickle

1
def unserialize_pickle(my_file: str) -> object

Loads an object from a pickle file.

NumPy Array Functions

serialize_numpy

1
def serialize_numpy(my_array: np.ndarray, my_file: str) -> None

Saves a NumPy array to a .npy file.

unserialize_numpy

1
def unserialize_numpy(my_file: str) -> np.ndarray

Loads a NumPy array from a .npy file.

File Management Functions

get_lastest_file_in_a_dir

1
def get_lastest_file_in_a_dir(dir_path: str) -> str

Gets the most recently modified .pth file in a directory.

get_cache_dir

1
def get_cache_dir(app_name: str = "hydro") -> str

Gets the appropriate cache directory for the current platform.

Classes

NumpyArrayEncoder

1
class NumpyArrayEncoder(json.JSONEncoder)

JSON encoder that handles NumPy arrays and scalars.

API Reference

Author: Wenyu Ouyang Date: 2024-08-15 10:08:59 LastEditTime: 2025-02-02 06:27:44 LastEditors: Wenyu Ouyang Description: some methods for file operations FilePath: \hydroutils\hydroutils\hydro_file.py Copyright (c) 2023-2024 Wenyu Ouyang. All rights reserved.

NumpyArrayEncoder

Bases: JSONEncoder

JSON encoder that handles NumPy arrays and scalar types.

This encoder converts NumPy arrays and scalar types to Python native types that can be serialized by the standard JSON encoder.

Source code in hydroutils/hydro_file.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
class NumpyArrayEncoder(json.JSONEncoder):
    """JSON encoder that handles NumPy arrays and scalar types.

    This encoder converts NumPy arrays and scalar types to Python native types
    that can be serialized by the standard JSON encoder.
    """

    def default(self, obj):
        """Convert NumPy types to JSON serializable objects.

        Args:
            obj: Object to encode.

        Returns:
            JSON serializable object.
        """
        if isinstance(obj, np.ndarray):
            return self.convert_ndarray(obj)
        elif isinstance(obj, (np.integer, np.floating)):
            return obj.item()
        return json.JSONEncoder.default(self, obj)

    def convert_ndarray(self, array):
        """Convert a NumPy array to a nested list.

        Args:
            array (np.ndarray): NumPy array to convert.

        Returns:
            list or scalar: Python native type equivalent of the array.
        """
        if array.ndim == 0:
            return array.item()
        return [
            (
                self.convert_ndarray(element)
                if isinstance(element, np.ndarray)
                else element
            )
            for element in array
        ]

convert_ndarray(array)

Convert a NumPy array to a nested list.

Parameters:

Name Type Description Default
array ndarray

NumPy array to convert.

required

Returns:

Type Description

list or scalar: Python native type equivalent of the array.

Source code in hydroutils/hydro_file.py
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def convert_ndarray(self, array):
    """Convert a NumPy array to a nested list.

    Args:
        array (np.ndarray): NumPy array to convert.

    Returns:
        list or scalar: Python native type equivalent of the array.
    """
    if array.ndim == 0:
        return array.item()
    return [
        (
            self.convert_ndarray(element)
            if isinstance(element, np.ndarray)
            else element
        )
        for element in array
    ]

default(obj)

Convert NumPy types to JSON serializable objects.

Parameters:

Name Type Description Default
obj

Object to encode.

required

Returns:

Type Description

JSON serializable object.

Source code in hydroutils/hydro_file.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
def default(self, obj):
    """Convert NumPy types to JSON serializable objects.

    Args:
        obj: Object to encode.

    Returns:
        JSON serializable object.
    """
    if isinstance(obj, np.ndarray):
        return self.convert_ndarray(obj)
    elif isinstance(obj, (np.integer, np.floating)):
        return obj.item()
    return json.JSONEncoder.default(self, obj)

download_a_file_from_google_drive(drive, dir_id, download_dir)

Download files from Google Drive.

Parameters:

Name Type Description Default
drive

Google Drive API instance.

required
dir_id str

ID of the Google Drive directory.

required
download_dir str

Local directory to save downloaded files.

required

Returns:

Type Description

None

Note

Handles both files and folders recursively. Skips already downloaded files.

Source code in hydroutils/hydro_file.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
def download_a_file_from_google_drive(drive, dir_id, download_dir):
    """Download files from Google Drive.

    Args:
        drive: Google Drive API instance.
        dir_id (str): ID of the Google Drive directory.
        download_dir (str): Local directory to save downloaded files.

    Returns:
        None

    Note:
        Handles both files and folders recursively.
        Skips already downloaded files.
    """
    file_list = drive.ListFile(
        {"q": f"'{dir_id}' in parents and trashed=false"}
    ).GetList()
    for file in file_list:
        print(f'title: {file["title"]}, id: {file["id"]}')
        file_dl = drive.CreateFile({"id": file["id"]})
        print(f'mimetype is {file_dl["mimeType"]}')
        if file_dl["mimeType"] == "application/vnd.google-apps.folder":
            download_dir_sub = os.path.join(download_dir, file_dl["title"])
            if not os.path.isdir(download_dir_sub):
                os.makedirs(download_dir_sub)
            download_a_file_from_google_drive(drive, file_dl["id"], download_dir_sub)
        else:
            # download
            temp_file = os.path.join(download_dir, file_dl["title"])
            if os.path.isfile(temp_file):
                print("file has been downloaded")
                continue
            file_dl.GetContentFile(os.path.join(download_dir, file_dl["title"]))
            print("Downloading file finished")

download_excel(data_url, temp_file)

Download an Excel file from URL.

Parameters:

Name Type Description Default
data_url str

URL of the Excel file to download.

required
temp_file str

Path where the Excel file will be saved.

required

Returns:

Type Description

None

Note

Only downloads if the file doesn't already exist locally.

Source code in hydroutils/hydro_file.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def download_excel(data_url, temp_file):
    """Download an Excel file from URL.

    Args:
        data_url (str): URL of the Excel file to download.
        temp_file (str): Path where the Excel file will be saved.

    Returns:
        None

    Note:
        Only downloads if the file doesn't already exist locally.
    """
    if not os.path.isfile(temp_file):
        urllib.request.urlretrieve(data_url, temp_file)

download_one_zip(data_url, data_dir)

Download one zip file from URL and extract it.

Parameters:

Name Type Description Default
data_url str

The URL of the file to download.

required
data_dir str

Directory where the file will be downloaded and extracted.

required

Returns:

Type Description

None

Note

The function will create the target directory if it doesn't exist.

Source code in hydroutils/hydro_file.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def download_one_zip(data_url, data_dir):
    """Download one zip file from URL and extract it.

    Args:
        data_url (str): The URL of the file to download.
        data_dir (str): Directory where the file will be downloaded and extracted.

    Returns:
        None

    Note:
        The function will create the target directory if it doesn't exist.
    """

    zipfile_path, unzip_dir = zip_file_name_from_url(data_url, data_dir)
    if not is_there_file(zipfile_path, unzip_dir):
        if not os.path.isdir(unzip_dir):
            os.makedirs(unzip_dir)
        r = requests.get(data_url, stream=True)
        with open(zipfile_path, "wb") as py_file:
            for chunk in r.iter_content(chunk_size=1024):  # 1024 bytes
                if chunk:
                    py_file.write(chunk)
        unzip_nested_zip(zipfile_path, unzip_dir), download_small_file

download_small_file(data_url, temp_file)

Download a small file from URL.

Parameters:

Name Type Description Default
data_url str

URL of the file to download.

required
temp_file str

Path where the downloaded file will be saved.

required

Returns:

Type Description

None

Note

Uses requests library for downloading.

Source code in hydroutils/hydro_file.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def download_small_file(data_url, temp_file):
    """Download a small file from URL.

    Args:
        data_url (str): URL of the file to download.
        temp_file (str): Path where the downloaded file will be saved.

    Returns:
        None

    Note:
        Uses requests library for downloading.
    """
    r = requests.get(data_url)
    with open(temp_file, "w") as f:
        f.write(r.text)

download_small_zip(data_url, data_dir)

Download a small zip file and extract it.

Parameters:

Name Type Description Default
data_url str

URL of the zip file to download.

required
data_dir str

Directory where the file will be downloaded and extracted.

required

Returns:

Type Description

None

Note

Uses urllib.request for downloading small files.

Source code in hydroutils/hydro_file.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def download_small_zip(data_url, data_dir):
    """Download a small zip file and extract it.

    Args:
        data_url (str): URL of the zip file to download.
        data_dir (str): Directory where the file will be downloaded and extracted.

    Returns:
        None

    Note:
        Uses urllib.request for downloading small files.
    """
    zipfile_path, unzip_dir = zip_file_name_from_url(data_url, data_dir)
    if not is_there_file(zipfile_path, unzip_dir):
        if not os.path.isdir(unzip_dir):
            os.mkdir(unzip_dir)
        zipfile_path, _ = urllib.request.urlretrieve(data_url, zipfile_path)
        unzip_nested_zip(zipfile_path, unzip_dir)

download_zip_files(urls, the_dir)

Download multiple files from multiple URLs.

Parameters:

Name Type Description Default
urls list

List of URLs to download files from.

required
the_dir Path

Directory where downloaded files will be stored.

required

Returns:

Type Description

None

Note

Uses a temporary directory for caching during download.

Source code in hydroutils/hydro_file.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def download_zip_files(urls, the_dir: Path):
    """Download multiple files from multiple URLs.

    Args:
        urls (list): List of URLs to download files from.
        the_dir (Path): Directory where downloaded files will be stored.

    Returns:
        None

    Note:
        Uses a temporary directory for caching during download.
    """
    with tempfile.TemporaryDirectory() as tmpdir:
        cache_names = tmpdir.joinpath(f"{the_dir.stem}.sqlite")
        r = ar.retrieve(urls, "binary", cache_name=cache_names, ssl=False)
        files = [the_dir.joinpath(url.split("/")[-1]) for url in urls]
        [files[i].write_bytes(io.BytesIO(r[i]).getbuffer()) for i in range(len(files))]

get_cache_dir(app_name='hydro')

Get the appropriate cache directory for the current operating system.

Parameters:

Name Type Description Default
app_name str

Name of the application. Defaults to "hydro".

'hydro'

Returns:

Name Type Description
str

Path to the cache directory.

Note

Creates the directory if it doesn't exist. Follows OS-specific conventions: - Windows: %LOCALAPPDATA%/app_name/Cache - macOS: ~/Library/Caches/app_name - Linux: ~/.cache/app_name

Source code in hydroutils/hydro_file.py
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
def get_cache_dir(app_name="hydro"):
    """Get the appropriate cache directory for the current operating system.

    Args:
        app_name (str, optional): Name of the application. Defaults to "hydro".

    Returns:
        str: Path to the cache directory.

    Note:
        Creates the directory if it doesn't exist.
        Follows OS-specific conventions:
        - Windows: %LOCALAPPDATA%/app_name/Cache
        - macOS: ~/Library/Caches/app_name
        - Linux: ~/.cache/app_name
    """
    home = os.path.expanduser("~")
    system = platform.system()

    if system == "Windows":
        cache_dir = os.path.join(home, "AppData", "Local", app_name, "Cache")
    elif system == "Darwin":
        cache_dir = os.path.join(home, "Library", "Caches", app_name)
    else:
        cache_dir = os.path.join(home, ".cache", app_name)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)

    return cache_dir

get_lastest_file_in_a_dir(dir_path)

Get the last file in a directory

Parameters

dir_path : str the directory

Returns

str the path of the weight file

Source code in hydroutils/hydro_file.py
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
def get_lastest_file_in_a_dir(dir_path):
    """Get the last file in a directory

    Parameters
    ----------
    dir_path : str
        the directory

    Returns
    -------
    str
        the path of the weight file
    """
    pth_files_lst = [
        os.path.join(dir_path, file)
        for file in os.listdir(dir_path)
        if fnmatch.fnmatch(file, "*.pth")
    ]
    return get_latest_file_in_a_lst(pth_files_lst)

get_latest_file_in_a_lst(lst)

Get the most recently modified file from a list of files.

Parameters:

Name Type Description Default
lst list

List of file paths.

required

Returns:

Name Type Description
str

Path of the most recently modified file.

Source code in hydroutils/hydro_file.py
421
422
423
424
425
426
427
428
429
430
431
432
def get_latest_file_in_a_lst(lst):
    """Get the most recently modified file from a list of files.

    Args:
        lst (list): List of file paths.

    Returns:
        str: Path of the most recently modified file.
    """
    lst_ctime = [os.path.getctime(file) for file in lst]
    sort_idx = np.argsort(lst_ctime)
    return lst[sort_idx[-1]]

is_there_file(zipfile_path, unzip_dir)

if a file has existed

Source code in hydroutils/hydro_file.py
 95
 96
 97
 98
 99
100
101
def is_there_file(zipfile_path, unzip_dir):
    """if a file has existed"""
    if os.path.isfile(zipfile_path):
        if os.path.isdir(unzip_dir):
            return True
        unzip_nested_zip(zipfile_path, unzip_dir)
        return True

serialize_json(my_dict, my_file, encoding='utf-8', ensure_ascii=True)

Serialize a dictionary to a JSON file.

Parameters:

Name Type Description Default
my_dict dict

Dictionary to serialize.

required
my_file str

Path to the output JSON file.

required
encoding str

File encoding. Defaults to "utf-8".

'utf-8'
ensure_ascii bool

If True, ensure ASCII output. Defaults to True.

True

Returns:

Type Description

None

Source code in hydroutils/hydro_file.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def serialize_json(my_dict, my_file, encoding="utf-8", ensure_ascii=True):
    """Serialize a dictionary to a JSON file.

    Args:
        my_dict (dict): Dictionary to serialize.
        my_file (str): Path to the output JSON file.
        encoding (str, optional): File encoding. Defaults to "utf-8".
        ensure_ascii (bool, optional): If True, ensure ASCII output. Defaults to True.

    Returns:
        None
    """
    with open(my_file, "w", encoding=encoding) as FP:
        json.dump(my_dict, FP, ensure_ascii=ensure_ascii, indent=4)

serialize_json_np(my_dict, my_file)

Serialize a dictionary containing NumPy arrays to a JSON file.

Parameters:

Name Type Description Default
my_dict dict

Dictionary containing NumPy arrays to serialize.

required
my_file str

Path to the output JSON file.

required

Returns:

Type Description

None

Note

Uses NumpyArrayEncoder to handle NumPy types.

Source code in hydroutils/hydro_file.py
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
def serialize_json_np(my_dict, my_file):
    """Serialize a dictionary containing NumPy arrays to a JSON file.

    Args:
        my_dict (dict): Dictionary containing NumPy arrays to serialize.
        my_file (str): Path to the output JSON file.

    Returns:
        None

    Note:
        Uses NumpyArrayEncoder to handle NumPy types.
    """
    with open(my_file, "w") as FP:
        json.dump(my_dict, FP, cls=NumpyArrayEncoder)

serialize_numpy(my_array, my_file)

Save a NumPy array to a binary file.

Parameters:

Name Type Description Default
my_array ndarray

NumPy array to save.

required
my_file str

Path to the output file.

required

Returns:

Type Description

None

Source code in hydroutils/hydro_file.py
375
376
377
378
379
380
381
382
383
384
385
def serialize_numpy(my_array, my_file):
    """Save a NumPy array to a binary file.

    Args:
        my_array (np.ndarray): NumPy array to save.
        my_file (str): Path to the output file.

    Returns:
        None
    """
    np.save(my_file, my_array)

serialize_pickle(my_object, my_file)

Serialize an object to a pickle file.

Parameters:

Name Type Description Default
my_object object

Python object to serialize.

required
my_file str

Path to the output pickle file.

required

Returns:

Type Description

None

Source code in hydroutils/hydro_file.py
347
348
349
350
351
352
353
354
355
356
357
358
def serialize_pickle(my_object, my_file):
    """Serialize an object to a pickle file.

    Args:
        my_object (object): Python object to serialize.
        my_file (str): Path to the output pickle file.

    Returns:
        None
    """
    with open(my_file, "wb") as f:
        pickle.dump(my_object, f)

unserialize_json(my_file)

Load a JSON file into a Python object.

Parameters:

Name Type Description Default
my_file str

Path to the JSON file to read.

required

Returns:

Name Type Description
object

Python object (typically dict or list) loaded from the JSON file.

Source code in hydroutils/hydro_file.py
273
274
275
276
277
278
279
280
281
282
283
284
def unserialize_json(my_file):
    """Load a JSON file into a Python object.

    Args:
        my_file (str): Path to the JSON file to read.

    Returns:
        object: Python object (typically dict or list) loaded from the JSON file.
    """
    with open(my_file, "r") as fp:
        my_object = json.load(fp)
    return my_object

unserialize_json_ordered(my_file)

Load a JSON file into an OrderedDict, preserving key order.

Parameters:

Name Type Description Default
my_file str

Path to the JSON file to read.

required

Returns:

Name Type Description
OrderedDict

Dictionary with preserved key order from the JSON file.

Source code in hydroutils/hydro_file.py
259
260
261
262
263
264
265
266
267
268
269
270
def unserialize_json_ordered(my_file):
    """Load a JSON file into an OrderedDict, preserving key order.

    Args:
        my_file (str): Path to the JSON file to read.

    Returns:
        OrderedDict: Dictionary with preserved key order from the JSON file.
    """
    with open(my_file, "r") as fp:
        m_dict = json.load(fp, object_pairs_hook=OrderedDict)
    return m_dict

unserialize_numpy(my_file)

Load a NumPy array from a binary file.

Parameters:

Name Type Description Default
my_file str

Path to the NumPy array file.

required

Returns:

Type Description

np.ndarray: NumPy array loaded from the file.

Source code in hydroutils/hydro_file.py
388
389
390
391
392
393
394
395
396
397
def unserialize_numpy(my_file):
    """Load a NumPy array from a binary file.

    Args:
        my_file (str): Path to the NumPy array file.

    Returns:
        np.ndarray: NumPy array loaded from the file.
    """
    return np.load(my_file)

unserialize_pickle(my_file)

Load an object from a pickle file.

Parameters:

Name Type Description Default
my_file str

Path to the pickle file to read.

required

Returns:

Name Type Description
object

Python object loaded from the pickle file.

Source code in hydroutils/hydro_file.py
361
362
363
364
365
366
367
368
369
370
371
372
def unserialize_pickle(my_file):
    """Load an object from a pickle file.

    Args:
        my_file (str): Path to the pickle file to read.

    Returns:
        object: Python object loaded from the pickle file.
    """
    with open(my_file, "rb") as f:
        my_object = pickle.load(f)
    return my_object

unzip_file(data_zip, path_unzip)

Extract a zip file to the specified directory.

Parameters:

Name Type Description Default
data_zip str

Path to the zip file to extract.

required
path_unzip str

Directory where the contents will be extracted.

required

Returns:

Type Description

None

Source code in hydroutils/hydro_file.py
45
46
47
48
49
50
51
52
53
54
55
56
def unzip_file(data_zip, path_unzip):
    """Extract a zip file to the specified directory.

    Args:
        data_zip (str): Path to the zip file to extract.
        path_unzip (str): Directory where the contents will be extracted.

    Returns:
        None
    """
    with zipfile.ZipFile(data_zip, "r") as zip_temp:
        zip_temp.extractall(path_unzip)

unzip_nested_zip(dataset_zip, path_unzip)

Extract a zip file including any nested zip files If a file's name is "xxx_", it seems the "extractall" function in the "zipfile" lib will throw an OSError, so please check the unzipped files manually when this occurs. Parameters


dataset_zip: the zip file path_unzip: where it is unzipped

Source code in hydroutils/hydro_file.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def unzip_nested_zip(dataset_zip, path_unzip):
    """
    Extract a zip file including any nested zip files
    If a file's name is "xxx_", it seems the "extractall" function in the "zipfile" lib will throw an OSError,
    so please check the unzipped files manually when this occurs.
    Parameters
    ----------
    dataset_zip: the zip file
    path_unzip: where it is unzipped
    """

    with zipfile.ZipFile(dataset_zip, "r") as zfile:
        try:
            zfile.extractall(path=path_unzip)
        except OSError as e:
            logging.warning(
                "Please check the unzipped files manually. There may be some missed important files."
            )
            logging.warning(f"The directory is: {path_unzip}")
            logging.warning(f"Error message: {e}")
    for root, dirs, files in os.walk(path_unzip):
        for filename in files:
            if re.search(r"\.zip$", filename):
                file_spec = os.path.join(root, filename)
                new_dir = os.path.join(root, filename[:-4])
                unzip_nested_zip(file_spec, new_dir)

zip_extract(the_dir)

Extract the downloaded zip files in the specified directory.

Parameters:

Name Type Description Default
the_dir Path

The directory containing zip files to extract.

required

Returns:

Type Description
None

None

Source code in hydroutils/hydro_file.py
30
31
32
33
34
35
36
37
38
39
40
41
42
def zip_extract(the_dir) -> None:
    """Extract the downloaded zip files in the specified directory.

    Args:
        the_dir (Path): The directory containing zip files to extract.

    Returns:
        None
    """
    for f in the_dir.glob("*.zip"):
        with zipfile.ZipFile(f) as zf:
            # extract files to a directory named by f.stem
            zf.extractall(the_dir.joinpath(f.stem))