Compare commits

...

7 Commits

Author SHA1 Message Date
Chang She
d1b0c8f0fe feat(python): enable polars predict pushdown 2024-02-03 19:33:45 -08:00
QianZhu
e412194008 fix hybrid search example (#922) 2024-02-03 09:26:32 +05:30
Lance Release
a9088224c5 [python] Bump version: 0.5.2 → 0.5.3 2024-02-03 03:04:04 +00:00
Ayush Chaurasia
688c57a0d8 fix: revert safe_import_pandas usage (#921) 2024-02-02 18:57:13 -08:00
Lance Release
12a98deded Updating package-lock.json 2024-02-02 22:37:23 +00:00
Lance Release
e4bb042918 Updating package-lock.json 2024-02-02 21:57:07 +00:00
Lance Release
04e1662681 Bump version: 0.4.7 → 0.4.8 2024-02-02 21:56:57 +00:00
14 changed files with 80 additions and 53 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion] [bumpversion]
current_version = 0.4.7 current_version = 0.4.8
commit = True commit = True
message = Bump version: {current_version} → {new_version} message = Bump version: {current_version} → {new_version}
tag = True tag = True

View File

@@ -6,17 +6,24 @@ LanceDB supports both semantic and keyword-based search. In real world applicati
You can perform hybrid search in LanceDB by combining the results of semantic and full-text search via a reranking algorithm of your choice. LanceDB provides multiple rerankers out of the box. However, you can always write a custom reranker if your use case need more sophisticated logic . You can perform hybrid search in LanceDB by combining the results of semantic and full-text search via a reranking algorithm of your choice. LanceDB provides multiple rerankers out of the box. However, you can always write a custom reranker if your use case need more sophisticated logic .
```python ```python
import os
import lancedb import lancedb
import openai
from lancedb.embeddings import get_registry from lancedb.embeddings import get_registry
from lancedb.pydanatic import LanceModel, Vector from lancedb.pydantic import LanceModel, Vector
db = lancedb.connect("~/.lancedb") db = lancedb.connect("~/.lancedb")
# Ingest embedding function in LanceDB table # Ingest embedding function in LanceDB table
# Configuring the environment variable OPENAI_API_KEY
if "OPENAI_API_KEY" not in os.environ:
# OR set the key here as a variable
openai.api_key = "sk-..."
embeddings = get_registry().get("openai").create() embeddings = get_registry().get("openai").create()
class Documents(LanceModel): class Documents(LanceModel):
vector: Vector(embeddings.ndims) = embeddings.VectorField() vector: Vector(embeddings.ndims()) = embeddings.VectorField()
text: str = embeddings.SourceField() text: str = embeddings.SourceField()
table = db.create_table("documents", schema=Documents) table = db.create_table("documents", schema=Documents)
@@ -31,17 +38,19 @@ data = [
# ingest docs with auto-vectorization # ingest docs with auto-vectorization
table.add(data) table.add(data)
# Create a fts index before the hybrid search
table.create_fts_index("text")
# hybrid search with default re-ranker # hybrid search with default re-ranker
results = table.search("flower moon", query_type="hybrid").to_pandas() results = table.search("flower moon", query_type="hybrid").to_pandas()
``` ```
By default, LanceDB uses `LinearCombinationReranker(weights=0.7)` to combine and rerank the results of semantic and full-text search. You can customize the hyperparameters as needed or write your own custom reranker. Here's how you can use any of the available rerankers: By default, LanceDB uses `LinearCombinationReranker(weight=0.7)` to combine and rerank the results of semantic and full-text search. You can customize the hyperparameters as needed or write your own custom reranker. Here's how you can use any of the available rerankers:
### `rerank()` arguments ### `rerank()` arguments
* `normalize`: `str`, default `"score"`: * `normalize`: `str`, default `"score"`:
The method to normalize the scores. Can be "rank" or "score". If "rank", the scores are converted to ranks and then normalized. If "score", the scores are normalized directly. The method to normalize the scores. Can be "rank" or "score". If "rank", the scores are converted to ranks and then normalized. If "score", the scores are normalized directly.
* `reranker`: `Reranker`, default `LinearCombinationReranker(weights=0.7)`. * `reranker`: `Reranker`, default `LinearCombinationReranker(weight=0.7)`.
The reranker to use. If not specified, the default reranker is used. The reranker to use. If not specified, the default reranker is used.
@@ -55,7 +64,7 @@ This is the default re-ranker used by LanceDB. It combines the results of semant
```python ```python
from lancedb.rerankers import LinearCombinationReranker from lancedb.rerankers import LinearCombinationReranker
reranker = LinearCombinationReranker(weights=0.3) # Use 0.3 as the weight for vector search reranker = LinearCombinationReranker(weight=0.3) # Use 0.3 as the weight for vector search
results = table.search("rebel", query_type="hybrid").rerank(reranker=reranker).to_pandas() results = table.search("rebel", query_type="hybrid").rerank(reranker=reranker).to_pandas()
``` ```

44
node/package-lock.json generated
View File

@@ -1,12 +1,12 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.4.7", "version": "0.4.8",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "vectordb", "name": "vectordb",
"version": "0.4.7", "version": "0.4.8",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"
@@ -53,11 +53,11 @@
"uuid": "^9.0.0" "uuid": "^9.0.0"
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.7", "@lancedb/vectordb-darwin-arm64": "0.4.8",
"@lancedb/vectordb-darwin-x64": "0.4.7", "@lancedb/vectordb-darwin-x64": "0.4.8",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.7", "@lancedb/vectordb-linux-arm64-gnu": "0.4.8",
"@lancedb/vectordb-linux-x64-gnu": "0.4.7", "@lancedb/vectordb-linux-x64-gnu": "0.4.8",
"@lancedb/vectordb-win32-x64-msvc": "0.4.7" "@lancedb/vectordb-win32-x64-msvc": "0.4.8"
} }
}, },
"node_modules/@75lb/deep-merge": { "node_modules/@75lb/deep-merge": {
@@ -329,9 +329,9 @@
} }
}, },
"node_modules/@lancedb/vectordb-darwin-arm64": { "node_modules/@lancedb/vectordb-darwin-arm64": {
"version": "0.4.7", "version": "0.4.8",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.7.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.8.tgz",
"integrity": "sha512-kACOIytgjBfX8NRwjPKe311XRN3lbSN13B7avT5htMd3kYm3AnnMag9tZhlwoO7lIuvGaXhy7mApygJrjhfJ4g==", "integrity": "sha512-FpnJaw7KmNdD/FtOw9AcmPL5P+L04AcnfPj9ZyEjN8iCwB/qaOGYgdfBv+EbEtfHIsqA12q/1BRduu9KdB6BIA==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -341,9 +341,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-darwin-x64": { "node_modules/@lancedb/vectordb-darwin-x64": {
"version": "0.4.7", "version": "0.4.8",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.7.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.8.tgz",
"integrity": "sha512-vb74iK5uPWCwz5E60r3yWp/R/HSg54/Z9AZWYckYXqsPv4w/nfbkM5iZhfRqqR/9uE6JClWJKOtjbk7b8CFRFg==", "integrity": "sha512-RafOEYyZIgphp8wPGuVLFaTc8aAqo0NCO1LQMx0mB0xV96vrdo0Mooivs+dYN3RFfSHtTKPw9O1Jc957Vp1TLg==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -353,9 +353,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-arm64-gnu": { "node_modules/@lancedb/vectordb-linux-arm64-gnu": {
"version": "0.4.7", "version": "0.4.8",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.7.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.8.tgz",
"integrity": "sha512-jHp7THm6S9sB8RaCxGoZXLAwGAUHnawUUilB1K3mvQsRdfB2bBs0f7wDehW+PDhr+Iog4LshaWbcnoQEUJWR+Q==", "integrity": "sha512-WlbYNfj4+v1hBHUluF+hnlG/A0ZaQFdXBTGDfHQniL11o+n3emWm4ujP5nSAoQHXjSH9DaOTGr/N4Mc9Xe+luw==",
"cpu": [ "cpu": [
"arm64" "arm64"
], ],
@@ -365,9 +365,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-linux-x64-gnu": { "node_modules/@lancedb/vectordb-linux-x64-gnu": {
"version": "0.4.7", "version": "0.4.8",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.7.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.8.tgz",
"integrity": "sha512-LKbVe6Wrp/AGqCCjKliNDmYoeTNgY/wfb2DTLjrx41Jko/04ywLrJ6xSEAn3XD5RDCO5u3fyUdXHHHv5a3VAAQ==", "integrity": "sha512-z+qFJrDqnNEv4JcwYDyt51PHmWjuM/XaOlSjpBnyyuUImeY+QcwctMuyXt8+Q4zhuqQR1AhLKrMwCU+YmMfk5g==",
"cpu": [ "cpu": [
"x64" "x64"
], ],
@@ -377,9 +377,9 @@
] ]
}, },
"node_modules/@lancedb/vectordb-win32-x64-msvc": { "node_modules/@lancedb/vectordb-win32-x64-msvc": {
"version": "0.4.7", "version": "0.4.8",
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.7.tgz", "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.8.tgz",
"integrity": "sha512-C5ln4+wafeY1Sm4PeV0Ios9lUaQVVip5Mjl9XU7ngioSEMEuXI/XMVfIdVfDPppVNXPeQxg33wLA272uw88D1Q==", "integrity": "sha512-VjUryVvEA04r0j4lU9pJy84cmjuQm1GhBzbPc8kwbn5voT4A6BPglrlNsU0Zc+j8Fbjyvauzw2lMEcMsF4F0rw==",
"cpu": [ "cpu": [
"x64" "x64"
], ],

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.4.7", "version": "0.4.8",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
@@ -85,10 +85,10 @@
} }
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.7", "@lancedb/vectordb-darwin-arm64": "0.4.8",
"@lancedb/vectordb-darwin-x64": "0.4.7", "@lancedb/vectordb-darwin-x64": "0.4.8",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.7", "@lancedb/vectordb-linux-arm64-gnu": "0.4.8",
"@lancedb/vectordb-linux-x64-gnu": "0.4.7", "@lancedb/vectordb-linux-x64-gnu": "0.4.8",
"@lancedb/vectordb-win32-x64-msvc": "0.4.7" "@lancedb/vectordb-win32-x64-msvc": "0.4.8"
} }
} }

View File

@@ -1,5 +1,5 @@
[bumpversion] [bumpversion]
current_version = 0.5.2 current_version = 0.5.3
commit = True commit = True
message = [python] Bump version: {current_version} → {new_version} message = [python] Bump version: {current_version} → {new_version}
tag = True tag = True

View File

@@ -16,9 +16,9 @@ from typing import Iterable, List, Union
import numpy as np import numpy as np
import pyarrow as pa import pyarrow as pa
from .util import safe_import from .util import safe_import_pandas
pd = safe_import("pandas") pd = safe_import_pandas()
DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]] DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray] VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]

View File

@@ -16,9 +16,9 @@ import deprecation
from . import __version__ from . import __version__
from .exceptions import MissingColumnError, MissingValueError from .exceptions import MissingColumnError, MissingValueError
from .util import safe_import from .util import safe_import_pandas
pd = safe_import("pandas") pd = safe_import_pandas()
def contextualize(raw_df: "pd.DataFrame") -> Contextualizer: def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:

View File

@@ -26,10 +26,10 @@ import pyarrow as pa
from lance.vector import vec_to_table from lance.vector import vec_to_table
from retry import retry from retry import retry
from ..util import safe_import from ..util import safe_import_pandas
from ..utils.general import LOGGER from ..utils.general import LOGGER
pd = safe_import("pandas") pd = safe_import_pandas()
DATA = Union[pa.Table, "pd.DataFrame"] DATA = Union[pa.Table, "pd.DataFrame"]
TEXT = Union[str, List[str], pa.Array, pa.ChunkedArray, np.ndarray] TEXT = Union[str, List[str], pa.Array, pa.ChunkedArray, np.ndarray]

View File

@@ -27,7 +27,7 @@ from . import __version__
from .common import VEC, VECTOR_COLUMN_NAME from .common import VEC, VECTOR_COLUMN_NAME
from .rerankers.base import Reranker from .rerankers.base import Reranker
from .rerankers.linear_combination import LinearCombinationReranker from .rerankers.linear_combination import LinearCombinationReranker
from .util import safe_import from .util import safe_import_pandas
if TYPE_CHECKING: if TYPE_CHECKING:
import PIL import PIL
@@ -36,7 +36,7 @@ if TYPE_CHECKING:
from .pydantic import LanceModel from .pydantic import LanceModel
from .table import Table from .table import Table
pd = safe_import("pandas") pd = safe_import_pandas()
class Query(pydantic.BaseModel): class Query(pydantic.BaseModel):

View File

@@ -34,7 +34,8 @@ from .query import LanceQueryBuilder, Query
from .util import ( from .util import (
fs_from_uri, fs_from_uri,
join_uri, join_uri,
safe_import, safe_import_pandas,
safe_import_polars,
value_to_sql, value_to_sql,
) )
from .utils.events import register_event from .utils.events import register_event
@@ -48,8 +49,8 @@ if TYPE_CHECKING:
from .db import LanceDBConnection from .db import LanceDBConnection
pd = safe_import("pandas") pd = safe_import_pandas()
pl = safe_import("polars") pl = safe_import_polars()
def _sanitize_data( def _sanitize_data(
@@ -835,9 +836,7 @@ class LanceTable(Table):
------- -------
pl.LazyFrame pl.LazyFrame
""" """
return pl.scan_pyarrow_dataset( return pl.scan_pyarrow_dataset(self.to_lance(), batch_size=batch_size)
self.to_lance(), allow_pyarrow_filter=False, batch_size=batch_size
)
@property @property
def _dataset_uri(self) -> str: def _dataset_uri(self) -> str:

View File

@@ -134,6 +134,24 @@ def safe_import(module: str, mitigation=None):
raise ImportError(f"Please install {mitigation or module}") raise ImportError(f"Please install {mitigation or module}")
def safe_import_pandas():
try:
import pandas as pd
return pd
except ImportError:
return None
def safe_import_polars():
try:
import polars as pl
return pl
except ImportError:
return None
@singledispatch @singledispatch
def value_to_sql(value): def value_to_sql(value):
raise NotImplementedError("SQL conversion is not implemented for this type") raise NotImplementedError("SQL conversion is not implemented for this type")

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "lancedb" name = "lancedb"
version = "0.5.2" version = "0.5.3"
dependencies = [ dependencies = [
"deprecation", "deprecation",
"pylance==0.9.12", "pylance==0.9.12",
@@ -14,7 +14,8 @@ dependencies = [
"pyyaml>=6.0", "pyyaml>=6.0",
"click>=8.1.7", "click>=8.1.7",
"requests>=2.31.0", "requests>=2.31.0",
"overrides>=0.7" "overrides>=0.7",
"pyarrow>=14.0"
] ]
description = "lancedb" description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }] authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "vectordb-node" name = "vectordb-node"
version = "0.4.7" version = "0.4.8"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license = "Apache-2.0" license = "Apache-2.0"
edition = "2018" edition = "2018"

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "vectordb" name = "vectordb"
version = "0.4.7" version = "0.4.8"
edition = "2021" edition = "2021"
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license = "Apache-2.0" license = "Apache-2.0"