Remove image decoder memory padding from examples (#5484)

* Remove memory padding from examples * Patch mxnet script to load nvimagecodec first to avoid libjpeg symbol clash Signed-off-by: Joaquin Anton <[email protected]>
NVIDIA · May 28, 2024 · 1ea44da · 1ea44da
1 parent 78545d4
commit 1ea44da
Show file tree

Hide file tree

Showing 6 changed files with 12 additions and 21 deletions.
diff --git a/docs/examples/use_cases/mxnet/mxnet-resnet50.ipynb b/docs/examples/use_cases/mxnet/mxnet-resnet50.ipynb
@@ -64,17 +64,12 @@
     "                                          pad_last_batch=is_training, name=\"Reader\")\n",
     "        dali_device = 'cpu' if dali_cpu else 'gpu'\n",
     "        decoder_device = 'cpu' if dali_cpu else 'mixed'\n",
-    "        # ask nvJPEG to preallocate memory for the biggest sample in ImageNet for CPU and GPU to avoid reallocations in runtime\n",
-    "        device_memory_padding = 211025920 if decoder_device == 'mixed' else 0\n",
-    "        host_memory_padding = 140544512 if decoder_device == 'mixed' else 0\n",
     "        # ask HW NVJPEG to allocate memory ahead for the biggest image in the data set to avoid reallocations in runtime\n",
     "        preallocate_width_hint = 5980 if decoder_device == 'mixed' else 0\n",
     "        preallocate_height_hint = 6430 if decoder_device == 'mixed' else 0\n",
     "        if is_training:\n",
     "            images = fn.decoders.image_random_crop(images,\n",
     "                                                  device=decoder_device, output_type=types.RGB,\n",
-    "                                                  device_memory_padding=device_memory_padding,\n",
-    "                                                  host_memory_padding=host_memory_padding,\n",
     "                                                  preallocate_width_hint=preallocate_width_hint,\n",
     "                                                  preallocate_height_hint=preallocate_height_hint,\n",
     "                                                  random_aspect_ratio=[0.8, 1.25],\n",

diff --git a/docs/examples/use_cases/paddle/resnet50/dali.py b/docs/examples/use_cases/paddle/resnet50/dali.py
@@ -51,10 +51,6 @@ def create_dali_pipeline(data_dir, ops_meta, shard_id, num_shards, dali_cpu=Fals
                                         pad_last_batch=True,
                                         name="Reader")
     decoder_device = 'cpu' if dali_cpu else 'mixed'
-    # ask nvJPEG to preallocate memory for the biggest sample in ImageNet for CPU and
-    #  GPU to avoid reallocations in runtime
-    device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
-    host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
     # ask HW nvJPEG to allocate memory ahead for the biggest image in the data set to
     # avoid reallocations in runtime
     preallocate_width_hint = 5980 if decoder_device == 'mixed' else 0
@@ -63,8 +59,6 @@ def create_dali_pipeline(data_dir, ops_meta, shard_id, num_shards, dali_cpu=Fals
         images = fn.decoders.image_random_crop(images,
                                                 device=decoder_device,
                                                 output_type=types.RGB,
-                                                device_memory_padding=device_memory_padding,
-                                                host_memory_padding=host_memory_padding,
                                                 preallocate_width_hint=preallocate_width_hint,
                                                 preallocate_height_hint=preallocate_height_hint,
                                                 random_aspect_ratio=[

diff --git a/docs/examples/use_cases/pytorch/efficientnet/image_classification/dali.py b/docs/examples/use_cases/pytorch/efficientnet/image_classification/dali.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,11 +35,7 @@ def training_pipe(data_dir, interpolation, image_size, output_layout, automatic_
         decoder_device = "cpu"
         resize_device = "cpu"
 
-    # This padding sets the size of the internal nvJPEG buffers to be able to handle all images
-    # from full-sized ImageNet without additional reallocations
     images = fn.decoders.image_random_crop(jpegs, device=decoder_device, output_type=types.RGB,
-                                           device_memory_padding=211025920,
-                                           host_memory_padding=140544512,
                                            random_aspect_ratio=[0.75, 4.0 / 3.0],
                                            random_area=[0.08, 1.0])
 

diff --git a/docs/examples/use_cases/pytorch/resnet50/main.py b/docs/examples/use_cases/pytorch/resnet50/main.py
@@ -118,17 +118,12 @@ def create_dali_pipeline(data_dir, crop, size, shard_id, num_shards, dali_cpu=Fa
                                      name="Reader")
     dali_device = 'cpu' if dali_cpu else 'gpu'
     decoder_device = 'cpu' if dali_cpu else 'mixed'
-    # ask nvJPEG to preallocate memory for the biggest sample in ImageNet for CPU and GPU to avoid reallocations in runtime
-    device_memory_padding = 211025920 if decoder_device == 'mixed' else 0
-    host_memory_padding = 140544512 if decoder_device == 'mixed' else 0
     # ask HW NVJPEG to allocate memory ahead for the biggest image in the data set to avoid reallocations in runtime
     preallocate_width_hint = 5980 if decoder_device == 'mixed' else 0
     preallocate_height_hint = 6430 if decoder_device == 'mixed' else 0
     if is_training:
         images = fn.decoders.image_random_crop(images,
                                                device=decoder_device, output_type=types.RGB,
-                                               device_memory_padding=device_memory_padding,
-                                               host_memory_padding=host_memory_padding,
                                                preallocate_width_hint=preallocate_width_hint,
                                                preallocate_height_hint=preallocate_height_hint,
                                                random_aspect_ratio=[0.8, 1.25],

diff --git a/qa/TL3_RN50_convergence/test_mxnet.sh b/qa/TL3_RN50_convergence/test_mxnet.sh
@@ -5,6 +5,12 @@ min_perf=10000
 
 NUM_GPUS=`nvidia-smi -L | wc -l`
 
+# Disable memory padding to avoid OOM errors
+sed -i -e "s/'--dali-nvjpeg-memory-padding', int, 16/'--dali-nvjpeg-memory-padding', int, 0/g" /opt/mxnet/python/mxnet/io/dali_utils.py
+
+# TODO(janton): Remove the LD_PRELOAD with next nvImageCodec release
+# Make sure that nvImageCodec's jpeg symbol load before MXNet to avoid getting libjpeg version mismatch
+LD_PRELOAD=$(ls /usr/local/lib/python3.10/dist-packages/nvidia/nvimgcodec/extensions/libjpeg_turbo_ext.so*) \
 python /opt/mxnet/example/image-classification/train_imagenet_runner \
        --data-root=/data/imagenet/train-val-recordio-passthrough/ -b 144 \
        -n $NUM_GPUS --seed 42 2>&1 | tee dali.log

diff --git a/qa/TL3_RN50_short/test_mxnet.sh b/qa/TL3_RN50_short/test_mxnet.sh
@@ -5,6 +5,11 @@ min_perf=4000
 
 NUM_GPUS=`nvidia-smi -L | wc -l`
 
+# Disable memory padding to avoid OOM errors
+sed -i -e "s/'--dali-nvjpeg-memory-padding', int, 16/'--dali-nvjpeg-memory-padding', int, 0/g" /opt/mxnet/python/mxnet/io/dali_utils.py
+# Make sure that nvImageCodec loads before MXNet to avoid getting libjpeg version mismatch
+sed -i -e "s/import nvidia.dali.fn as fn/import nvidia.dali.fn as fn\nfrom nvidia import nvimgcodec/g" /opt/mxnet/python/mxnet/io/dali_utils.py
+
 python /opt/mxnet/example/image-classification/train_imagenet_runner \
        --data-root=/data/imagenet/train-val-recordio-passthrough/ -b 408 \
        -n $NUM_GPUS -e 5 --seed 42 --dali-threads 8 2>&1 | tee dali.log