mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-06 20:02:58 +00:00
feat(python): add support for trust_remote_code in hf embeddings (#1712)
Resovles #1709. Adds `trust_remote_code` as a parameter to the `TransformersEmbeddingFunction` class with a default of False. Updated relevant documentation with the same.
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
# Huggingface embedding models
|
# Huggingface embedding models
|
||||||
We offer support for all huggingface models (which can be loaded via [transformers](https://huggingface.co/docs/transformers/en/index) library). The default model is `colbert-ir/colbertv2.0` which also has its own special callout - `registry.get("colbert")`
|
We offer support for all Hugging Face models (which can be loaded via [transformers](https://huggingface.co/docs/transformers/en/index) library). The default model is `colbert-ir/colbertv2.0` which also has its own special callout - `registry.get("colbert")`. Some Hugging Face models might require custom models defined on the HuggingFace Hub in their own modeling files. You may enable this by setting `trust_remote_code=True`. This option should only be set to True for repositories you trust and in which you have read the code, as it will execute code present on the Hub on your local machine.
|
||||||
|
|
||||||
Example usage -
|
Example usage -
|
||||||
```python
|
```python
|
||||||
|
|||||||
@@ -40,6 +40,11 @@ class TransformersEmbeddingFunction(EmbeddingFunction):
|
|||||||
The device to use for the model. Default is "cpu".
|
The device to use for the model. Default is "cpu".
|
||||||
show_progress_bar : bool
|
show_progress_bar : bool
|
||||||
Whether to show a progress bar when loading the model. Default is True.
|
Whether to show a progress bar when loading the model. Default is True.
|
||||||
|
trust_remote_code : bool
|
||||||
|
Whether or not to allow for custom models defined on the HuggingFace
|
||||||
|
Hub in their own modeling files. This option should only be set to True
|
||||||
|
for repositories you trust and in which you have read the code, as it
|
||||||
|
will execute code present on the Hub on your local machine.
|
||||||
|
|
||||||
to download package, run :
|
to download package, run :
|
||||||
`pip install transformers`
|
`pip install transformers`
|
||||||
@@ -49,6 +54,7 @@ class TransformersEmbeddingFunction(EmbeddingFunction):
|
|||||||
|
|
||||||
name: str = "colbert-ir/colbertv2.0"
|
name: str = "colbert-ir/colbertv2.0"
|
||||||
device: str = "cpu"
|
device: str = "cpu"
|
||||||
|
trust_remote_code: bool = False
|
||||||
_tokenizer: Any = PrivateAttr()
|
_tokenizer: Any = PrivateAttr()
|
||||||
_model: Any = PrivateAttr()
|
_model: Any = PrivateAttr()
|
||||||
|
|
||||||
@@ -57,7 +63,9 @@ class TransformersEmbeddingFunction(EmbeddingFunction):
|
|||||||
self._ndims = None
|
self._ndims = None
|
||||||
transformers = attempt_import_or_raise("transformers")
|
transformers = attempt_import_or_raise("transformers")
|
||||||
self._tokenizer = transformers.AutoTokenizer.from_pretrained(self.name)
|
self._tokenizer = transformers.AutoTokenizer.from_pretrained(self.name)
|
||||||
self._model = transformers.AutoModel.from_pretrained(self.name)
|
self._model = transformers.AutoModel.from_pretrained(
|
||||||
|
self.name, trust_remote_code=self.trust_remote_code
|
||||||
|
)
|
||||||
self._model.to(self.device)
|
self._model.to(self.device)
|
||||||
|
|
||||||
if PYDANTIC_VERSION.major < 2: # Pydantic 1.x compat
|
if PYDANTIC_VERSION.major < 2: # Pydantic 1.x compat
|
||||||
|
|||||||
Reference in New Issue
Block a user