polish based on last PR review

Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>
huggingface · Aug 27, 2024 · 463c211 · 463c211
1 parent bb3ba4a
commit 463c211
Show file tree

Hide file tree

Showing 8 changed files with 124 additions and 25 deletions.
diff --git a/src/transformers/integrations/__init__.py b/src/transformers/integrations/__init__.py
@@ -93,6 +93,7 @@
         "run_hp_search_ray",
         "run_hp_search_sigopt",
         "run_hp_search_wandb",
+        "validate_bnb_backend_availability",
     ],
     "peft": ["PeftAdapterMixin"],
     "quanto": ["replace_with_quanto_layers"],
@@ -175,6 +176,7 @@
         run_hp_search_ray,
         run_hp_search_sigopt,
         run_hp_search_wandb,
+        validate_bnb_backend_availability,
     )
     from .peft import PeftAdapterMixin
     from .quanto import replace_with_quanto_layers

diff --git a/src/transformers/integrations/bitsandbytes.py b/src/transformers/integrations/bitsandbytes.py
@@ -6,7 +6,11 @@
 
 from packaging import version
 
-from ..utils import is_accelerate_available, is_bitsandbytes_available, logging
+from ..utils import (
+    is_accelerate_available,
+    is_bitsandbytes_available,
+    logging,
+)
 
 
 if is_bitsandbytes_available():

diff --git a/src/transformers/integrations/integration_utils.py b/src/transformers/integrations/integration_utils.py
@@ -38,7 +38,9 @@
 from ..utils import (
     PushToHubMixin,
     flatten_dict,
+    get_available_devices,
     is_datasets_available,
+    is_ipex_available,
     is_pandas_available,
     is_tf_available,
     is_torch_available,
@@ -204,6 +206,67 @@ def is_dvclive_available():
     return importlib.util.find_spec("dvclive") is not None
 
 
+def _validate_bnb_multi_backend_availability(raise_exception):
+    import bitsandbytes as bnb
+
+    bnb_supported_devices = getattr(bnb, "supported_torch_devices", set())
+    available_devices = get_available_devices()
+
+    if available_devices == {"cpu"} and not is_ipex_available():
+        from importlib.util import find_spec
+
+        if find_spec("intel_extension_for_pytorch"):
+            logger.warning(
+                "You have Intel IPEX installed but if you're intending to use it for CPU, it might not have the right version. Be sure to double check that your PyTorch and IPEX installs are compatible."
+            )
+
+        available_devices.discard("cpu")  # Only Intel CPU is supported by BNB at the moment
+
+    if not available_devices.intersection(bnb_supported_devices):
+        if raise_exception:
+            bnb_supported_devices_with_info = set(  # noqa: C401
+                '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)'
+                if device == "cpu"
+                else device
+                for device in bnb_supported_devices
+            )
+            err_msg = f"None of the available devices `available_devices = {available_devices or None}` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {bnb_supported_devices_with_info}`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend"
+
+            logger.error(err_msg)
+            raise RuntimeError(err_msg)
+
+        logger.warning("No supported devices found for bitsandbytes multi-backend.")
+        return False
+
+    logger.debug("Multi-backend validation successful.")
+    return True
+
+
+def _validate_bnb_cuda_backend_availability(raise_exception):
+    if not torch.cuda.is_available():
+        log_msg = "CUDA is required but not available for bitsandbytes. Please consider installing the multi-platform enabled version of bitsandbytes, which is currently a work in progress. Please check currently supported platforms and installation instructions at https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend"
+        if raise_exception:
+            logger.error(log_msg)
+            raise RuntimeError(log_msg)
+
+        logger.warning(log_msg)
+        return False
+
+    logger.debug("CUDA backend validation successful.")
+    return True
+
+
+def validate_bnb_backend_availability(raise_exception=False):
+    """
+    Validates if the available devices are supported by bitsandbytes, optionally raising an exception if not.
+    """
+    import bitsandbytes as bnb
+
+    if "multi_backend" in getattr(bnb, "features", set()):
+        return _validate_bnb_multi_backend_availability(raise_exception)
+    return _validate_bnb_cuda_backend_availability(raise_exception)
+
+
 def hp_params(trial):
     if is_optuna_available():
         import optuna

diff --git a/src/transformers/quantizers/quantizer_bnb_4bit.py b/src/transformers/quantizers/quantizer_bnb_4bit.py
@@ -65,7 +65,7 @@ def __init__(self, quantization_config, **kwargs):
             self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules
 
     def validate_environment(self, *args, **kwargs):
-        if not (is_accelerate_available() and is_bitsandbytes_available()):
+        if not is_accelerate_available():
             raise ImportError(
                 f"Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
             )
@@ -75,15 +75,11 @@ def validate_environment(self, *args, **kwargs):
             )
         import bitsandbytes as bnb
 
-        bnb_is_multibackend_enabled = "multi_backend" in getattr(bnb, "features", set())
+        bnb_multibackend_is_enabled = "multi_backend" in getattr(bnb, "features", set())
 
-        if not torch.cuda.is_available():
-            import bitsandbytes as bnb
+        from ..integrations.integration_utils import validate_bnb_backend_availability
 
-            if not bnb_is_multibackend_enabled:
-                raise RuntimeError(
-                    "Current bitsandbytes (`main`) only supports CUDA, please switch to the `multi-backend-refactor` preview release for WIP support of other backends."
-                )
+        validate_bnb_backend_availability(raise_exception=True)
 
         if kwargs.get("from_tf", False) or kwargs.get("from_flax", False):
             raise ValueError(
@@ -100,7 +96,7 @@ def validate_environment(self, *args, **kwargs):
             device_map_without_lm_head = {
                 key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert
             }
-            if set(device_map.values()) == {"cpu"} and bnb_is_multibackend_enabled:
+            if set(device_map.values()) == {"cpu"} and bnb_multibackend_is_enabled:
                 pass
             elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
                 raise ValueError(

diff --git a/src/transformers/quantizers/quantizer_bnb_8bit.py b/src/transformers/quantizers/quantizer_bnb_8bit.py
@@ -65,7 +65,7 @@ def __init__(self, quantization_config, **kwargs):
             self.modules_to_not_convert = self.quantization_config.llm_int8_skip_modules
 
     def validate_environment(self, *args, **kwargs):
-        if not (is_accelerate_available() and is_bitsandbytes_available()):
+        if not is_accelerate_available():
             raise ImportError(
                 f"Using `bitsandbytes` 8-bit quantization requires Accelerate: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
             )
@@ -75,15 +75,11 @@ def validate_environment(self, *args, **kwargs):
             )
         import bitsandbytes as bnb
 
-        bnb_is_multibackend_enabled = "multi_backend" in getattr(bnb, "features", set())
+        bnb_multibackend_is_enabled = "multi_backend" in getattr(bnb, "features", set())
 
-        if not torch.cuda.is_available():
-            import bitsandbytes as bnb
+        from ..integrations.integration_utils import validate_bnb_backend_availability
 
-            if not bnb_is_multibackend_enabled:
-                raise RuntimeError(
-                    "Current bitsandbytes (`main`) only supports CUDA, please switch to the `multi-backend-refactor` preview release for WIP support of other backends."
-                )
+        validate_bnb_backend_availability(raise_exception=True)
 
         if kwargs.get("from_tf", False) or kwargs.get("from_flax", False):
             raise ValueError(
@@ -100,7 +96,7 @@ def validate_environment(self, *args, **kwargs):
             device_map_without_lm_head = {
                 key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert
             }
-            if set(device_map.values()) == {"cpu"} and bnb_is_multibackend_enabled:
+            if set(device_map.values()) == {"cpu"} and bnb_multibackend_is_enabled:
                 pass
             elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
                 raise ValueError(

diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py
@@ -221,6 +221,17 @@ def parse_int_from_env(key, default=None):
 _run_third_party_device_tests = parse_flag_from_env("RUN_THIRD_PARTY_DEVICE_TESTS", default=False)
 
 
+def get_device_count():
+    import torch
+
+    if is_torch_xpu_available():
+        num_devices = torch.xpu.device_count()
+    else:
+        num_devices = torch.cuda.device_count()
+
+    return num_devices
+
+
 def is_pt_tf_cross_test(test_case):
     """
     Decorator marking a test as a test that control interactions between PyTorch and TensorFlow.
@@ -748,9 +759,7 @@ def require_torch_multi_gpu(test_case):
     if not is_torch_available():
         return unittest.skip(reason="test requires PyTorch")(test_case)
 
-    import torch
-
-    device_count = torch.cuda.device_count() if not is_torch_xpu_available() else torch.xpu.device_count()
+    device_count = get_device_count()
 
     return unittest.skipUnless(device_count > 1, "test requires multiple GPUs")(test_case)
 

diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py
@@ -15,6 +15,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from functools import lru_cache
+
 from huggingface_hub import get_full_repo_name  # for backward compatibility
 from huggingface_hub.constants import HF_HUB_DISABLE_TELEMETRY as DISABLE_TELEMETRY  # for backward compatibility
 from packaging import version
@@ -274,3 +276,30 @@ def check_min_version(min_version):
             + "Check out https://github.com/huggingface/transformers/tree/main/examples#important-note for the examples corresponding to other "
             "versions of HuggingFace Transformers."
         )
+
+@lru_cache()
+def get_available_devices():
+    """
+    Returns a set of devices available for the current PyTorch installation.
+    """
+    devices = {"cpu"}  # `cpu` is always supported as a device in PyTorch
+
+    if is_torch_cuda_available():
+        devices.add("cuda")
+
+    if is_torch_mps_available():
+        devices.add("mps")
+
+    if is_torch_xpu_available():
+        devices.add("xpu")
+
+    if is_torch_npu_available():
+        devices.add("npu")
+
+    if is_torch_mlu_available():
+        devices.add("mlu")
+
+    if is_torch_musa_available():
+        devices.add("musa")
+
+    return devices
diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py
@@ -840,11 +840,11 @@ def is_torch_xpu_available(check_device=False):
     return hasattr(torch, "xpu") and torch.xpu.is_available()
 
 
+@lru_cache()
 def is_bitsandbytes_available():
-    if not is_torch_available():
-        return False
+    from transformers.integrations.integration_utils import validate_bnb_backend_availability
 
-    return _bitsandbytes_available
+    return is_torch_available() and validate_bnb_backend_availability(raise_exception=False)
 
 
 def is_flash_attn_2_available():