From 3ab4b335c3e0d08e5a2ba6244d93f16302e77a41 Mon Sep 17 00:00:00 2001 From: elliottRobinson <129797935+elliottRobinson@users.noreply.github.com> Date: Tue, 26 Dec 2023 05:54:22 -0800 Subject: [PATCH 01/43] Update default_embedding_functions.md (#744) Modify some grammar, punctuation, and spelling errors. --- .../embeddings/default_embedding_functions.md | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/src/embeddings/default_embedding_functions.md b/docs/src/embeddings/default_embedding_functions.md index 3ee06b5a..432c951f 100644 --- a/docs/src/embeddings/default_embedding_functions.md +++ b/docs/src/embeddings/default_embedding_functions.md @@ -1,9 +1,9 @@ -There are various Embedding functions available out of the box with lancedb. We're working on supporting other popular embedding APIs. +There are various Embedding functions available out of the box with LanceDB. We're working on supporting other popular embedding APIs. ## Text Embedding Functions Here are the text embedding functions registered by default. -Embedding functions have inbuilt rate limit handler wrapper for source and query embedding function calls that retry with exponential standoff. -Each `EmbeddingFunction` implementation automatically takes `max_retries` as an argument which has the deafult value of 7. +Embedding functions have an inbuilt rate limit handler wrapper for source and query embedding function calls that retry with exponential standoff. +Each `EmbeddingFunction` implementation automatically takes `max_retries` as an argument which has the default value of 7. ### Sentence Transformers Here are the parameters that you can set when registering a `sentence-transformers` object, and their default values: @@ -69,15 +69,15 @@ print(actual.text) ``` ### Instructor Embeddings -Instructor is an instruction-finetuned text embedding model that can generate text embeddings tailored to any task (e.g., classification, retrieval, clustering, text evaluation, etc.) and domains (e.g., science, finance, etc.) by simply providing the task instruction, without any finetuning +Instructor is an instruction-finetuned text embedding model that can generate text embeddings tailored to any task (e.g. classification, retrieval, clustering, text evaluation, etc.) and domains (e.g. science, finance, etc.) by simply providing the task instruction, without any finetuning. If you want to calculate customized embeddings for specific sentences, you may follow the unified template to write instructions: Represent the `domain` `text_type` for `task_objective`: -* `domain` is optional, and it specifies the domain of the text, e.g., science, finance, medicine, etc. -* `text_type` is required, and it specifies the encoding unit, e.g., sentence, document, paragraph, etc. -* `task_objective` is optional, and it specifies the objective of embedding, e.g., retrieve a document, classify the sentence, etc. +* `domain` is optional, and it specifies the domain of the text, e.g. science, finance, medicine, etc. +* `text_type` is required, and it specifies the encoding unit, e.g. sentence, document, paragraph, etc. +* `task_objective` is optional, and it specifies the objective of embedding, e.g. retrieve a document, classify the sentence, etc. More information about the model can be found here - https://github.com/xlang-ai/instructor-embedding @@ -119,10 +119,10 @@ tbl.add(texts) ``` ## Multi-modal embedding functions -Multi-modal embedding functions allow you query your table using both images and text. +Multi-modal embedding functions allow you to query your table using both images and text. ### OpenClipEmbeddings -We support CLIP model embeddings using the open souce alternbative, open-clip which support various customizations. It is registered as `open-clip` and supports following customizations. +We support CLIP model embeddings using the open source alternative, open-clip which supports various customizations. It is registered as `open-clip` and supports the following customizations: | Parameter | Type | Default Value | Description | @@ -205,4 +205,4 @@ print(actual.label) ``` -If you have any questions about the embeddings API, supported models, or see a relevant model missing, please raise an issue. \ No newline at end of file +If you have any questions about the embeddings API, supported models, or see a relevant model missing, please raise an issue. From b56c54c990acc860dbfd5d1aadbd2fa146e66fff Mon Sep 17 00:00:00 2001 From: Lance Release Date: Tue, 26 Dec 2023 16:51:09 +0000 Subject: [PATCH 02/43] =?UTF-8?q?Bump=20version:=200.4.0=20=E2=86=92=200.4?= =?UTF-8?q?.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- node/package.json | 12 ++++++------ rust/ffi/node/Cargo.toml | 2 +- rust/vectordb/Cargo.toml | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index a004daf8..5257c203 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.0 +current_version = 0.4.1 commit = True message = Bump version: {current_version} → {new_version} tag = True diff --git a/node/package.json b/node/package.json index 9c732270..74a238e9 100644 --- a/node/package.json +++ b/node/package.json @@ -1,6 +1,6 @@ { "name": "vectordb", - "version": "0.4.0", + "version": "0.4.1", "description": " Serverless, low-latency vector database for AI applications", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -81,10 +81,10 @@ } }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.0", - "@lancedb/vectordb-darwin-x64": "0.4.0", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.0", - "@lancedb/vectordb-linux-x64-gnu": "0.4.0", - "@lancedb/vectordb-win32-x64-msvc": "0.4.0" + "@lancedb/vectordb-darwin-arm64": "0.4.1", + "@lancedb/vectordb-darwin-x64": "0.4.1", + "@lancedb/vectordb-linux-arm64-gnu": "0.4.1", + "@lancedb/vectordb-linux-x64-gnu": "0.4.1", + "@lancedb/vectordb-win32-x64-msvc": "0.4.1" } } diff --git a/rust/ffi/node/Cargo.toml b/rust/ffi/node/Cargo.toml index 280db8f3..e2569bc3 100644 --- a/rust/ffi/node/Cargo.toml +++ b/rust/ffi/node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vectordb-node" -version = "0.4.0" +version = "0.4.1" description = "Serverless, low-latency vector database for AI applications" license = "Apache-2.0" edition = "2018" diff --git a/rust/vectordb/Cargo.toml b/rust/vectordb/Cargo.toml index a1cd136f..12bc7e45 100644 --- a/rust/vectordb/Cargo.toml +++ b/rust/vectordb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vectordb" -version = "0.4.0" +version = "0.4.1" edition = "2021" description = "LanceDB: A serverless, low-latency vector database for AI applications" license = "Apache-2.0" From d1f24ba1dd48b5bf8c3513e7a9590fb4bc907e6f Mon Sep 17 00:00:00 2001 From: Lance Release Date: Tue, 26 Dec 2023 16:51:16 +0000 Subject: [PATCH 03/43] =?UTF-8?q?[python]=20Bump=20version:=200.4.0=20?= =?UTF-8?q?=E2=86=92=200.4.1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.cfg | 2 +- python/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.cfg b/python/.bumpversion.cfg index ad6392e5..a4c558bd 100644 --- a/python/.bumpversion.cfg +++ b/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.0 +current_version = 0.4.1 commit = True message = [python] Bump version: {current_version} → {new_version} tag = True diff --git a/python/pyproject.toml b/python/pyproject.toml index fac0051a..ccece508 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lancedb" -version = "0.4.0" +version = "0.4.1" dependencies = [ "deprecation", "pylance==0.9.1", From 4891a7ae141960aeb1384835bcee536cb172658d Mon Sep 17 00:00:00 2001 From: Lance Release Date: Tue, 26 Dec 2023 17:21:51 +0000 Subject: [PATCH 04/43] Updating package-lock.json --- node/package-lock.json | 74 +++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/node/package-lock.json b/node/package-lock.json index 82e33d39..56c8c28a 100644 --- a/node/package-lock.json +++ b/node/package-lock.json @@ -1,12 +1,12 @@ { "name": "vectordb", - "version": "0.4.0", + "version": "0.4.1", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "vectordb", - "version": "0.4.0", + "version": "0.4.1", "cpu": [ "x64", "arm64" @@ -53,11 +53,11 @@ "uuid": "^9.0.0" }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.0", - "@lancedb/vectordb-darwin-x64": "0.4.0", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.0", - "@lancedb/vectordb-linux-x64-gnu": "0.4.0", - "@lancedb/vectordb-win32-x64-msvc": "0.4.0" + "@lancedb/vectordb-darwin-arm64": "0.4.1", + "@lancedb/vectordb-darwin-x64": "0.4.1", + "@lancedb/vectordb-linux-arm64-gnu": "0.4.1", + "@lancedb/vectordb-linux-x64-gnu": "0.4.1", + "@lancedb/vectordb-win32-x64-msvc": "0.4.1" } }, "node_modules/@apache-arrow/ts": { @@ -317,9 +317,9 @@ } }, "node_modules/@lancedb/vectordb-darwin-arm64": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.0.tgz", - "integrity": "sha512-cP6zGtBWXEcJHCI4uLNIP5ILtRvexvwmL8Uri1dnHG8dT8g12Ykug3BHO6Wt6wp/xASd2jJRIF/VAJsN9IeP1A==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.1.tgz", + "integrity": "sha512-ul/Hvv5RX2RThpKSuiUjJRVrmXuBPvpU+HrLjcBmu4dzpuWN4+IeHIUM6xe79gLxOKlwkscVweTOuZnmMfsZeg==", "cpu": [ "arm64" ], @@ -329,9 +329,9 @@ ] }, "node_modules/@lancedb/vectordb-darwin-x64": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.0.tgz", - "integrity": "sha512-ig0gV5ol1sFe2lb1HOatK0rizyj9I91WbnH79i7OdUl3nAQIcWm70CnxrPLtx0DS2NTGh2kFJbYCWcaUlu6YfA==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.1.tgz", + "integrity": "sha512-sJtF2Cv6T9RhUpdeHNkryiJwPuW9QPQ3aMs5fID1hMCJA2U3BX27t/WlkiPT2+kTLeUcwF1JvAOgsfvZkfvI8w==", "cpu": [ "x64" ], @@ -341,9 +341,9 @@ ] }, "node_modules/@lancedb/vectordb-linux-arm64-gnu": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.0.tgz", - "integrity": "sha512-gMXIDT2kriAPDwWIRKXdaTCNdOeFGEok1S9Y30AOruHXddW1vCIo4JNJIYbBqHnwAeI4wI3ae6GRCFaf1UxO3g==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.1.tgz", + "integrity": "sha512-tNnziT0BRjPsznKI4GgWROFdCOsCGx0inFu0z+WV1UomwXKcMWGslpWBqKE8IUiCq14duPVx/ie7Wwcf51IeJQ==", "cpu": [ "arm64" ], @@ -353,9 +353,9 @@ ] }, "node_modules/@lancedb/vectordb-linux-x64-gnu": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.0.tgz", - "integrity": "sha512-ZQ3lDrDSz1IKdx/mS9Lz08agFO+OD5oSFrrcFNCoT1+H93eS1mCLdmCoEARu3jKbx0tMs38l5J9yXZ2QmJye3w==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.1.tgz", + "integrity": "sha512-PAcF2p1FUsC0AD+qkLfgE5+ZlQwlHe9eTP9dSsX43V/NGPDQ9+gBzaBTEDbvyHj1wl2Wft2NwOqB1HAFhilSDg==", "cpu": [ "x64" ], @@ -365,9 +365,9 @@ ] }, "node_modules/@lancedb/vectordb-win32-x64-msvc": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.0.tgz", - "integrity": "sha512-toNcNwBRE1sdsSf5hr7W8QiqZ33csc/knVEek4CyvYkZHJGh4Z6WI+DJUIASo5wzUez4TX7qUPpRPL9HuaPMCg==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.1.tgz", + "integrity": "sha512-8mvThCppI/AfSPby6Y3t6xpCfbo8IY6JH5exO8fDGTwBFHOqgwR4Izb2K7FgXxkwUYcN4EfGSsk/6B1GpwMudg==", "cpu": [ "x64" ], @@ -4869,33 +4869,33 @@ } }, "@lancedb/vectordb-darwin-arm64": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.0.tgz", - "integrity": "sha512-cP6zGtBWXEcJHCI4uLNIP5ILtRvexvwmL8Uri1dnHG8dT8g12Ykug3BHO6Wt6wp/xASd2jJRIF/VAJsN9IeP1A==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.1.tgz", + "integrity": "sha512-ul/Hvv5RX2RThpKSuiUjJRVrmXuBPvpU+HrLjcBmu4dzpuWN4+IeHIUM6xe79gLxOKlwkscVweTOuZnmMfsZeg==", "optional": true }, "@lancedb/vectordb-darwin-x64": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.0.tgz", - "integrity": "sha512-ig0gV5ol1sFe2lb1HOatK0rizyj9I91WbnH79i7OdUl3nAQIcWm70CnxrPLtx0DS2NTGh2kFJbYCWcaUlu6YfA==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.1.tgz", + "integrity": "sha512-sJtF2Cv6T9RhUpdeHNkryiJwPuW9QPQ3aMs5fID1hMCJA2U3BX27t/WlkiPT2+kTLeUcwF1JvAOgsfvZkfvI8w==", "optional": true }, "@lancedb/vectordb-linux-arm64-gnu": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.0.tgz", - "integrity": "sha512-gMXIDT2kriAPDwWIRKXdaTCNdOeFGEok1S9Y30AOruHXddW1vCIo4JNJIYbBqHnwAeI4wI3ae6GRCFaf1UxO3g==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.1.tgz", + "integrity": "sha512-tNnziT0BRjPsznKI4GgWROFdCOsCGx0inFu0z+WV1UomwXKcMWGslpWBqKE8IUiCq14duPVx/ie7Wwcf51IeJQ==", "optional": true }, "@lancedb/vectordb-linux-x64-gnu": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.0.tgz", - "integrity": "sha512-ZQ3lDrDSz1IKdx/mS9Lz08agFO+OD5oSFrrcFNCoT1+H93eS1mCLdmCoEARu3jKbx0tMs38l5J9yXZ2QmJye3w==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.1.tgz", + "integrity": "sha512-PAcF2p1FUsC0AD+qkLfgE5+ZlQwlHe9eTP9dSsX43V/NGPDQ9+gBzaBTEDbvyHj1wl2Wft2NwOqB1HAFhilSDg==", "optional": true }, "@lancedb/vectordb-win32-x64-msvc": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.0.tgz", - "integrity": "sha512-toNcNwBRE1sdsSf5hr7W8QiqZ33csc/knVEek4CyvYkZHJGh4Z6WI+DJUIASo5wzUez4TX7qUPpRPL9HuaPMCg==", + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.1.tgz", + "integrity": "sha512-8mvThCppI/AfSPby6Y3t6xpCfbo8IY6JH5exO8fDGTwBFHOqgwR4Izb2K7FgXxkwUYcN4EfGSsk/6B1GpwMudg==", "optional": true }, "@neon-rs/cli": { From 46bf5a1ed1a6d2b83b7716a0fba5d966807831fb Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Wed, 27 Dec 2023 09:10:09 -0800 Subject: [PATCH 05/43] feat(python): support list of list fields from pydantic schema (#747) For object detection, each row may correspond to an image and each image can have multiple bounding boxes of x-y coordinates. This means that a `bbox` field is potentially "list of list of float". This adds support in our pydantic-pyarrow conversion for nested lists. --- python/lancedb/pydantic.py | 3 +++ python/tests/test_pydantic.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/python/lancedb/pydantic.py b/python/lancedb/pydantic.py index caa69405..537d60a0 100644 --- a/python/lancedb/pydantic.py +++ b/python/lancedb/pydantic.py @@ -164,6 +164,9 @@ def _py_type_to_arrow_type(py_type: Type[Any]) -> pa.DataType: return pa.date32() elif py_type == datetime: return pa.timestamp("us") + elif py_type.__origin__ in (list, tuple): + child = py_type.__args__[0] + return pa.list_(_py_type_to_arrow_type(child)) raise TypeError( f"Converting Pydantic type to Arrow Type: unsupported type {py_type}" ) diff --git a/python/tests/test_pydantic.py b/python/tests/test_pydantic.py index fa7e4849..e6739032 100644 --- a/python/tests/test_pydantic.py +++ b/python/tests/test_pydantic.py @@ -15,7 +15,7 @@ import json import sys from datetime import date, datetime -from typing import List, Optional +from typing import List, Optional, Tuple import pyarrow as pa import pydantic @@ -39,6 +39,8 @@ def test_pydantic_to_arrow(): s: str vec: list[float] li: List[int] + lili: List[List[float]] + litu: List[Tuple[float, float]] opt: Optional[str] = None st: StructModel dt: date @@ -50,6 +52,8 @@ def test_pydantic_to_arrow(): s="hello", vec=[1.0, 2.0, 3.0], li=[2, 3, 4], + lili=[[2.5, 1.5], [3.5, 4.5], [5.5, 6.5]], + litu=[(2.5, 1.5), (3.5, 4.5), (5.5, 6.5)], st=StructModel(a="a", b=1.0), dt=date.today(), dtt=datetime.now(), @@ -63,6 +67,8 @@ def test_pydantic_to_arrow(): pa.field("s", pa.utf8(), False), pa.field("vec", pa.list_(pa.float64()), False), pa.field("li", pa.list_(pa.int64()), False), + pa.field("lili", pa.list_(pa.list_(pa.float64())), False), + pa.field("litu", pa.list_(pa.list_(pa.float64())), False), pa.field("opt", pa.utf8(), True), pa.field( "st", @@ -88,6 +94,8 @@ def test_pydantic_to_arrow_py38(): s: str vec: List[float] li: List[int] + lili: List[List[float]] + litu: List[Tuple[float, float]] opt: Optional[str] = None st: StructModel dt: date @@ -99,6 +107,8 @@ def test_pydantic_to_arrow_py38(): s="hello", vec=[1.0, 2.0, 3.0], li=[2, 3, 4], + lili=[[2.5, 1.5], [3.5, 4.5], [5.5, 6.5]], + litu=[(2.5, 1.5), (3.5, 4.5), (5.5, 6.5)], st=StructModel(a="a", b=1.0), dt=date.today(), dtt=datetime.now(), @@ -112,6 +122,8 @@ def test_pydantic_to_arrow_py38(): pa.field("s", pa.utf8(), False), pa.field("vec", pa.list_(pa.float64()), False), pa.field("li", pa.list_(pa.int64()), False), + pa.field("lili", pa.list_(pa.list_(pa.float64())), False), + pa.field("litu", pa.list_(pa.list_(pa.float64())), False), pa.field("opt", pa.utf8(), True), pa.field( "st", From e74c203e6f132f9f48f33d7d02b24566fad490be Mon Sep 17 00:00:00 2001 From: Aidan <64613310+aidangomar@users.noreply.github.com> Date: Wed, 27 Dec 2023 12:25:13 -0500 Subject: [PATCH 06/43] fix: createIndex index cache size (#741) --- node/src/remote/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/src/remote/index.ts b/node/src/remote/index.ts index 7fdcefd8..9d014a02 100644 --- a/node/src/remote/index.ts +++ b/node/src/remote/index.ts @@ -267,7 +267,7 @@ export class RemoteTable implements Table { const column = indexParams.column ?? 'vector' const indexType = 'vector' // only vector index is supported for remote connections const metricType = indexParams.metric_type ?? 'L2' - const indexCacheSize = indexParams ?? null + const indexCacheSize = indexParams.index_cache_size ?? null const data = { column, From a0afa8478627c435186269497e5107ee37d41c84 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Wed, 27 Dec 2023 09:31:04 -0800 Subject: [PATCH 07/43] feat(python): add post filtering for full text search (#739) Closes #721 fts will return results as a pyarrow table. Pyarrow tables has a `filter` method but it does not take sql filter strings (only pyarrow compute expressions). Instead, we do one of two things to support `tbl.search("keywords").where("foo=5").limit(10).to_arrow()`: Default path: If duckdb is available then use duckdb to execute the sql filter string on the pyarrow table. Backup path: Otherwise, write the pyarrow table to a lance dataset and then do `to_table(filter=)` Neither is ideal. Default path has two issues: 1. requires installing an extra library (duckdb) 2. duckdb mangles some fields (like fixed size list => list) Backup path incurs a latency penalty (~20ms on ssd) to write the resultset to disk. In the short term, once #676 is addressed, we can write the dataset to "memory://" instead of disk, this makes the post filter evaluate much quicker (ETA next week). In the longer term, we'd like to be able to evaluate the filter string on the pyarrow Table directly, one possibility being that we use Substrait to generate pyarrow compute expressions from sql string. Or if there's enough progress on pyarrow, it could support Substrait expressions directly (no ETA) --------- Co-authored-by: Will Jones --- docs/src/fts.md | 24 +++++++++++++++++++----- python/lancedb/query.py | 21 +++++++++++++++++++++ python/pyproject.toml | 2 +- python/tests/test_fts.py | 26 ++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 6 deletions(-) diff --git a/docs/src/fts.md b/docs/src/fts.md index 47f51346..78c20f6b 100644 --- a/docs/src/fts.md +++ b/docs/src/fts.md @@ -29,8 +29,9 @@ uri = "data/sample-lancedb" db = lancedb.connect(uri) table = db.create_table("my_table", - data=[{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}, - {"vector": [5.9, 26.5], "text": "There are several kittens playing"}]) + data=[{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy", "meta": "foo"}, + {"vector": [5.9, 26.5], "text": "Sam was a loyal puppy", "meta": "bar"}, + {"vector": [15.9, 6.5], "text": "There are several kittens playing"}]) ``` @@ -64,10 +65,23 @@ table.create_fts_index(["text1", "text2"]) Note that the search API call does not change - you can search over all indexed columns at once. +## Filtering + +Currently the LanceDB full text search feature supports *post-filtering*, meaning filters are +applied on top of the full text search results. This can be invoked via the familiar +`where` syntax: + +```python +table.search("puppy").limit(10).where("meta='foo'").to_list() +``` + ## Current limitations 1. Currently we do not yet support incremental writes. -If you add data after fts index creation, it won't be reflected -in search results until you do a full reindex. + If you add data after fts index creation, it won't be reflected + in search results until you do a full reindex. + +2. We currently only support local filesystem paths for the fts index. + This is a tantivy limitation. We've implemented an object store plugin + but there's no way in tantivy-py to specify to use it. -2. We currently only support local filesystem paths for the fts index. \ No newline at end of file diff --git a/python/lancedb/query.py b/python/lancedb/query.py index fe2dc86c..743602ad 100644 --- a/python/lancedb/query.py +++ b/python/lancedb/query.py @@ -488,6 +488,27 @@ class LanceFtsQueryBuilder(LanceQueryBuilder): scores = pa.array(scores) output_tbl = self._table.to_lance().take(row_ids, columns=self._columns) output_tbl = output_tbl.append_column("score", scores) + + if self._where is not None: + try: + # TODO would be great to have Substrait generate pyarrow compute expressions + # or conversely have pyarrow support SQL expressions using Substrait + import duckdb + + output_tbl = ( + duckdb.sql(f"SELECT * FROM output_tbl") + .filter(self._where) + .to_arrow_table() + ) + except ImportError: + import lance + import tempfile + + # TODO Use "memory://" instead once that's supported + with tempfile.TemporaryDirectory() as tmp: + ds = lance.write_dataset(output_tbl, tmp) + output_tbl = ds.to_table(filter=self._where) + return output_tbl diff --git a/python/pyproject.toml b/python/pyproject.toml index ccece508..e5c63177 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -46,7 +46,7 @@ classifiers = [ repository = "https://github.com/lancedb/lancedb" [project.optional-dependencies] -tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests"] +tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb"] dev = ["ruff", "pre-commit", "black"] docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] clip = ["torch", "pillow", "open-clip"] diff --git a/python/tests/test_fts.py b/python/tests/test_fts.py index 2a61f3ca..f09b44ef 100644 --- a/python/tests/test_fts.py +++ b/python/tests/test_fts.py @@ -12,6 +12,7 @@ # limitations under the License. import os import random +from unittest import mock import numpy as np import pandas as pd @@ -47,6 +48,7 @@ def table(tmp_path) -> ldb.table.LanceTable: data=pd.DataFrame( { "vector": vectors, + "id": [i % 2 for i in range(100)], "text": text, "text2": text, "nested": [{"text": t} for t in text], @@ -88,6 +90,7 @@ def test_create_index_from_table(tmp_path, table): [ { "vector": np.random.randn(128), + "id": 101, "text": "gorilla", "text2": "gorilla", "nested": {"text": "gorilla"}, @@ -121,3 +124,26 @@ def test_nested_schema(tmp_path, table): table.create_fts_index("nested.text") rs = table.search("puppy").limit(10).to_list() assert len(rs) == 10 + + +def test_search_index_with_filter(table): + table.create_fts_index("text") + orig_import = __import__ + + def import_mock(name, *args): + if name == "duckdb": + raise ImportError + return orig_import(name, *args) + + # no duckdb + with mock.patch("builtins.__import__", side_effect=import_mock): + rs = table.search("puppy").where("id=1").limit(10).to_list() + for r in rs: + assert r["id"] == 1 + + # yes duckdb + rs2 = table.search("puppy").where("id=1").limit(10).to_list() + for r in rs2: + assert r["id"] == 1 + + assert rs == rs2 From 7bac1131fbbf1bf0455f7a71a3b7772d8d66e3f2 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Thu, 28 Dec 2023 11:02:56 -0800 Subject: [PATCH 08/43] feat: add timezone handling for datetime in pydantic (#578) If you add timezone information in the Field annotation for a datetime then that will now be passed to the pyarrow data type. I'm not sure how pyarrow enforces timezones, right now, it silently coerces to the timezone given in the column regardless of whether the input had the matching timezone or not. This is probably not the right behavior. Though we could just make it so the user has to make the pydantic model do the validation instead of doing that at the pyarrow conversion layer. --- docs/src/guides/tables.md | 80 ++++++++++++++++++++++++++++++++++- python/lancedb/pydantic.py | 20 +++++---- python/pyproject.toml | 2 +- python/tests/test_pydantic.py | 17 ++++++-- 4 files changed, 105 insertions(+), 14 deletions(-) diff --git a/docs/src/guides/tables.md b/docs/src/guides/tables.md index 05cd6c25..5808e49a 100644 --- a/docs/src/guides/tables.md +++ b/docs/src/guides/tables.md @@ -118,6 +118,84 @@ This guide will show how to create tables, insert data into them, and update the table = db.create_table(table_name, schema=Content) ``` + #### Nested schemas + + Sometimes your data model may contain nested objects. + For example, you may want to store the document string + and the document soure name as a nested Document object: + + ```python + class Document(BaseModel): + content: str + source: str + ``` + + This can be used as the type of a LanceDB table column: + + ```python + class NestedSchema(LanceModel): + id: str + vector: Vector(1536) + document: Document + + tbl = db.create_table("nested_table", schema=NestedSchema, mode="overwrite") + ``` + + This creates a struct column called "document" that has two subfields + called "content" and "source": + + ``` + In [28]: tbl.schema + Out[28]: + id: string not null + vector: fixed_size_list[1536] not null + child 0, item: float + document: struct not null + child 0, content: string not null + child 1, source: string not null + ``` + + #### Validators + + Note that neither pydantic nor pyarrow automatically validates that input data + is of the *correct* timezone, but this is easy to add as a custom field validator: + + ```python + from datetime import datetime + from zoneinfo import ZoneInfo + + from lancedb.pydantic import LanceModel + from pydantic import Field, field_validator, ValidationError, ValidationInfo + + tzname = "America/New_York" + tz = ZoneInfo(tzname) + + class TestModel(LanceModel): + dt_with_tz: datetime = Field(json_schema_extra={"tz": tzname}) + + @field_validator('dt_with_tz') + @classmethod + def tz_must_match(cls, dt: datetime) -> datetime: + assert dt.tzinfo == tz + return dt + + ok = TestModel(dt_with_tz=datetime.now(tz)) + + try: + TestModel(dt_with_tz=datetime.now(ZoneInfo("Asia/Shanghai"))) + assert 0 == 1, "this should raise ValidationError" + except ValidationError: + print("A ValidationError was raised.") + pass + ``` + + When you run this code it should print "A ValidationError was raised." + + #### Pydantic custom types + + LanceDB does NOT yet support converting pydantic custom types. If this is something you need, + please file a feature request on the [LanceDB Github repo](https://github.com/lancedb/lancedb/issues/new). + ### Using Iterators / Writing Large Datasets It is recommended to use itertators to add large datasets in batches when creating your table in one go. This does not create multiple versions of your dataset unlike manually adding batches using `table.add()` @@ -153,7 +231,7 @@ This guide will show how to create tables, insert data into them, and update the You can also use iterators of other types like Pandas dataframe or Pylists directly in the above example. ## Creating Empty Table - You can also create empty tables in python. Initialize it with schema and later ingest data into it. + You can create empty tables in python. Initialize it with schema and later ingest data into it. ```python import lancedb diff --git a/python/lancedb/pydantic.py b/python/lancedb/pydantic.py index 537d60a0..48a67189 100644 --- a/python/lancedb/pydantic.py +++ b/python/lancedb/pydantic.py @@ -26,6 +26,7 @@ import numpy as np import pyarrow as pa import pydantic import semver +from pydantic.fields import FieldInfo from .embeddings import EmbeddingFunctionRegistry @@ -142,8 +143,8 @@ def Vector( return FixedSizeList -def _py_type_to_arrow_type(py_type: Type[Any]) -> pa.DataType: - """Convert Python Type to Arrow DataType. +def _py_type_to_arrow_type(py_type: Type[Any], field: FieldInfo) -> pa.DataType: + """Convert a field with native Python type to Arrow data type. Raises ------ @@ -163,12 +164,13 @@ def _py_type_to_arrow_type(py_type: Type[Any]) -> pa.DataType: elif py_type == date: return pa.date32() elif py_type == datetime: - return pa.timestamp("us") - elif py_type.__origin__ in (list, tuple): + tz = get_extras(field, "tz") + return pa.timestamp("us", tz=tz) + elif getattr(py_type, "__origin__", None) in (list, tuple): child = py_type.__args__[0] - return pa.list_(_py_type_to_arrow_type(child)) + return pa.list_(_py_type_to_arrow_type(child, field)) raise TypeError( - f"Converting Pydantic type to Arrow Type: unsupported type {py_type}" + f"Converting Pydantic type to Arrow Type: unsupported type {py_type}." ) @@ -197,10 +199,10 @@ def _pydantic_to_arrow_type(field: pydantic.fields.FieldInfo) -> pa.DataType: args = field.annotation.__args__ if origin == list: child = args[0] - return pa.list_(_py_type_to_arrow_type(child)) + return pa.list_(_py_type_to_arrow_type(child, field)) elif origin == Union: if len(args) == 2 and args[1] == type(None): - return _py_type_to_arrow_type(args[0]) + return _py_type_to_arrow_type(args[0], field) elif inspect.isclass(field.annotation): if issubclass(field.annotation, pydantic.BaseModel): # Struct @@ -208,7 +210,7 @@ def _pydantic_to_arrow_type(field: pydantic.fields.FieldInfo) -> pa.DataType: return pa.struct(fields) elif issubclass(field.annotation, FixedSizeListMixin): return pa.list_(field.annotation.value_arrow_type(), field.annotation.dim()) - return _py_type_to_arrow_type(field.annotation) + return _py_type_to_arrow_type(field.annotation, field) def is_nullable(field: pydantic.fields.FieldInfo) -> bool: diff --git a/python/pyproject.toml b/python/pyproject.toml index e5c63177..b56e43c7 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -46,7 +46,7 @@ classifiers = [ repository = "https://github.com/lancedb/lancedb" [project.optional-dependencies] -tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb"] +tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb", "pytz"] dev = ["ruff", "pre-commit", "black"] docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] clip = ["torch", "pillow", "open-clip"] diff --git a/python/tests/test_pydantic.py b/python/tests/test_pydantic.py index e6739032..8a3ee16b 100644 --- a/python/tests/test_pydantic.py +++ b/python/tests/test_pydantic.py @@ -13,6 +13,7 @@ import json +import pytz import sys from datetime import date, datetime from typing import List, Optional, Tuple @@ -38,13 +39,14 @@ def test_pydantic_to_arrow(): id: int s: str vec: list[float] - li: List[int] - lili: List[List[float]] - litu: List[Tuple[float, float]] + li: list[int] + lili: list[list[float]] + litu: list[tuple[float, float]] opt: Optional[str] = None st: StructModel dt: date dtt: datetime + dt_with_tz: datetime = Field(json_schema_extra={"tz": "Asia/Shanghai"}) # d: dict m = TestModel( @@ -57,6 +59,7 @@ def test_pydantic_to_arrow(): st=StructModel(a="a", b=1.0), dt=date.today(), dtt=datetime.now(), + dt_with_tz=datetime.now(pytz.timezone("Asia/Shanghai")), ) schema = pydantic_to_schema(TestModel) @@ -79,11 +82,16 @@ def test_pydantic_to_arrow(): ), pa.field("dt", pa.date32(), False), pa.field("dtt", pa.timestamp("us"), False), + pa.field("dt_with_tz", pa.timestamp("us", tz="Asia/Shanghai"), False), ] ) assert schema == expect_schema +@pytest.mark.skipif( + sys.version_info > (3, 8), + reason="using native type alias requires python3.9 or higher", +) def test_pydantic_to_arrow_py38(): class StructModel(pydantic.BaseModel): a: str @@ -100,6 +108,7 @@ def test_pydantic_to_arrow_py38(): st: StructModel dt: date dtt: datetime + dt_with_tz: datetime = Field(json_schema_extra={"tz": "Asia/Shanghai"}) # d: dict m = TestModel( @@ -112,6 +121,7 @@ def test_pydantic_to_arrow_py38(): st=StructModel(a="a", b=1.0), dt=date.today(), dtt=datetime.now(), + dt_with_tz=datetime.now(pytz.timezone("Asia/Shanghai")), ) schema = pydantic_to_schema(TestModel) @@ -134,6 +144,7 @@ def test_pydantic_to_arrow_py38(): ), pa.field("dt", pa.date32(), False), pa.field("dtt", pa.timestamp("us"), False), + pa.field("dt_with_tz", pa.timestamp("us", tz="Asia/Shanghai"), False), ] ) assert schema == expect_schema From c97ae6b7873884fd3eecb712d002e7a2a562bd60 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:05:57 -0800 Subject: [PATCH 09/43] chore(python): update embedding API to use openai 1.6.1 (#751) API has changed significantly, namely `openai.Embedding.create` no longer exists. https://github.com/openai/openai-python/discussions/742 Update the OpenAI embedding function and put a minimum on the openai sdk version. --- python/lancedb/embeddings/openai.py | 9 +++++++-- python/lancedb/embeddings/utils.py | 2 +- python/pyproject.toml | 2 +- python/tests/test_embeddings_slow.py | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/python/lancedb/embeddings/openai.py b/python/lancedb/embeddings/openai.py index 406ed40f..678fe417 100644 --- a/python/lancedb/embeddings/openai.py +++ b/python/lancedb/embeddings/openai.py @@ -10,6 +10,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from functools import cached_property from typing import List, Union import numpy as np @@ -44,6 +45,10 @@ class OpenAIEmbeddings(TextEmbeddingFunction): The texts to embed """ # TODO retry, rate limit, token limit + rs = self._openai_client.embeddings.create(input=texts, model=self.name) + return [v.embedding for v in rs.data] + + @cached_property + def _openai_client(self): openai = self.safe_import("openai") - rs = openai.Embedding.create(input=texts, model=self.name)["data"] - return [v["embedding"] for v in rs] + return openai.OpenAI() diff --git a/python/lancedb/embeddings/utils.py b/python/lancedb/embeddings/utils.py index 59ed0460..8f893142 100644 --- a/python/lancedb/embeddings/utils.py +++ b/python/lancedb/embeddings/utils.py @@ -249,7 +249,7 @@ def retry_with_exponential_backoff( if num_retries > max_retries: raise Exception( - f"Maximum number of retries ({max_retries}) exceeded." + f"Maximum number of retries ({max_retries}) exceeded.", e ) delay *= exponential_base * (1 + jitter * random.random()) diff --git a/python/pyproject.toml b/python/pyproject.toml index b56e43c7..c5c0cc94 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -50,7 +50,7 @@ tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", " dev = ["ruff", "pre-commit", "black"] docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] clip = ["torch", "pillow", "open-clip"] -embeddings = ["openai", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "InstructorEmbedding"] +embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "InstructorEmbedding"] [build-system] requires = ["setuptools", "wheel"] diff --git a/python/tests/test_embeddings_slow.py b/python/tests/test_embeddings_slow.py index 2e116827..826934f9 100644 --- a/python/tests/test_embeddings_slow.py +++ b/python/tests/test_embeddings_slow.py @@ -29,7 +29,7 @@ from lancedb.pydantic import LanceModel, Vector @pytest.mark.slow @pytest.mark.parametrize("alias", ["sentence-transformers", "openai"]) -def test_sentence_transformer(alias, tmp_path): +def test_basic_text_embeddings(alias, tmp_path): db = lancedb.connect(tmp_path) registry = get_registry() func = registry.get(alias).create(max_retries=0) From 7778031b26423fe5375d55d9e523af55fda82bbe Mon Sep 17 00:00:00 2001 From: Lance Release Date: Fri, 29 Dec 2023 00:19:21 +0000 Subject: [PATCH 10/43] =?UTF-8?q?[python]=20Bump=20version:=200.4.1=20?= =?UTF-8?q?=E2=86=92=200.4.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.cfg | 2 +- python/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.cfg b/python/.bumpversion.cfg index a4c558bd..f224639d 100644 --- a/python/.bumpversion.cfg +++ b/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.1 +current_version = 0.4.2 commit = True message = [python] Bump version: {current_version} → {new_version} tag = True diff --git a/python/pyproject.toml b/python/pyproject.toml index c5c0cc94..b8481237 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lancedb" -version = "0.4.1" +version = "0.4.2" dependencies = [ "deprecation", "pylance==0.9.1", From 98af0ceec6206476769ad50e4c223235f1d2455a Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Fri, 29 Dec 2023 15:33:03 -0800 Subject: [PATCH 11/43] feat(python): first cut batch queries for remote api (#753) issue separate requests under the hood and concatenate results --- python/lancedb/query.py | 4 +++- python/lancedb/remote/table.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/python/lancedb/query.py b/python/lancedb/query.py index 743602ad..3bdc763b 100644 --- a/python/lancedb/query.py +++ b/python/lancedb/query.py @@ -70,7 +70,7 @@ class Query(pydantic.BaseModel): vector_column: str = VECTOR_COLUMN_NAME # vector to search for - vector: List[float] + vector: Union[List[float], List[List[float]]] # sql filter to refine the query with filter: Optional[str] = None @@ -421,6 +421,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder): vector and the returned vectors. """ vector = self._query if isinstance(self._query, list) else self._query.tolist() + if isinstance(vector[0], np.ndarray): + vector = [v.tolist() for v in vector] query = Query( vector=vector, filter=self._where, diff --git a/python/lancedb/remote/table.py b/python/lancedb/remote/table.py index 158728fb..e09011a7 100644 --- a/python/lancedb/remote/table.py +++ b/python/lancedb/remote/table.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import uuid from functools import cached_property from typing import Dict, Optional, Union @@ -227,8 +228,24 @@ class RemoteTable(Table): return LanceVectorQueryBuilder(self, query, vector_column_name) def _execute_query(self, query: Query) -> pa.Table: - result = self._conn._client.query(self._name, query) - return self._conn._loop.run_until_complete(result).to_arrow() + if ( + query.vector is not None + and len(query.vector) > 0 + and not isinstance(query.vector[0], float) + ): + futures = [] + for v in query.vector: + v = list(v) + q = query.copy() + q.vector = v + futures.append(self._conn._client.query(self._name, q)) + result = self._conn._loop.run_until_complete(asyncio.gather(*futures)) + return pa.concat_tables( + [add_index(r.to_arrow(), i) for i, r in enumerate(result)] + ) + else: + result = self._conn._client.query(self._name, query) + return self._conn._loop.run_until_complete(result).to_arrow() def delete(self, predicate: str): """Delete rows from the table. @@ -342,3 +359,11 @@ class RemoteTable(Table): self._conn._loop.run_until_complete( self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload) ) + + +def add_index(tbl: pa.Table, i: int) -> pa.Table: + return tbl.add_column( + 0, + pa.field("query_index", pa.uint32()), + pa.array([i] * len(tbl), pa.uint32()), + ) From a63262cfda2124a11c3c96a201a153ae5ee8ec69 Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Sat, 30 Dec 2023 07:33:24 +0800 Subject: [PATCH 12/43] docs: fix link (#752) --- docs/src/python/pydantic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/python/pydantic.md b/docs/src/python/pydantic.md index cdb28df2..b677c010 100644 --- a/docs/src/python/pydantic.md +++ b/docs/src/python/pydantic.md @@ -7,7 +7,7 @@ LanceDB integrates with Pydantic for schema inference, data ingestion, and query LanceDB supports to create Apache Arrow Schema from a [Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel) -via [pydantic_to_schema()](python.md##lancedb.pydantic.pydantic_to_schema) method. +via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method. ::: lancedb.pydantic.pydantic_to_schema From 56db257ea97a33c58b5070c4b4db39d30a1b439c Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Fri, 29 Dec 2023 16:39:45 -0800 Subject: [PATCH 13/43] chore: bump pylance to 0.9.2 (#754) --- Cargo.toml | 8 ++++---- python/pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fb331eac..f5c73a96 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,10 +5,10 @@ exclude = ["python"] resolver = "2" [workspace.dependencies] -lance = { "version" = "=0.9.1", "features" = ["dynamodb"] } -lance-index = { "version" = "=0.9.1" } -lance-linalg = { "version" = "=0.9.1" } -lance-testing = { "version" = "=0.9.1" } +lance = { "version" = "=0.9.2", "features" = ["dynamodb"] } +lance-index = { "version" = "=0.9.2" } +lance-linalg = { "version" = "=0.9.2" } +lance-testing = { "version" = "=0.9.2" } # Note that this one does not include pyarrow arrow = { version = "49.0.0", optional = false } arrow-array = "49.0" diff --git a/python/pyproject.toml b/python/pyproject.toml index b8481237..e9a676dc 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -3,7 +3,7 @@ name = "lancedb" version = "0.4.2" dependencies = [ "deprecation", - "pylance==0.9.1", + "pylance==0.9.2", "ratelimiter~=1.0", "retry>=0.9.2", "tqdm>=4.27.0", From 918a2a4405607bd789f9b7932dc9e08eae0235db Mon Sep 17 00:00:00 2001 From: Lance Release Date: Sat, 30 Dec 2023 00:52:54 +0000 Subject: [PATCH 14/43] =?UTF-8?q?[python]=20Bump=20version:=200.4.2=20?= =?UTF-8?q?=E2=86=92=200.4.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.cfg | 2 +- python/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.cfg b/python/.bumpversion.cfg index f224639d..c69f92cd 100644 --- a/python/.bumpversion.cfg +++ b/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.2 +current_version = 0.4.3 commit = True message = [python] Bump version: {current_version} → {new_version} tag = True diff --git a/python/pyproject.toml b/python/pyproject.toml index e9a676dc..6580d804 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lancedb" -version = "0.4.2" +version = "0.4.3" dependencies = [ "deprecation", "pylance==0.9.2", From c629080d6029313ebd2231d805304576172c00fa Mon Sep 17 00:00:00 2001 From: Lance Release Date: Sat, 30 Dec 2023 00:53:30 +0000 Subject: [PATCH 15/43] =?UTF-8?q?Bump=20version:=200.4.1=20=E2=86=92=200.4?= =?UTF-8?q?.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- node/package.json | 12 ++++++------ rust/ffi/node/Cargo.toml | 2 +- rust/vectordb/Cargo.toml | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 5257c203..a4d2a718 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.1 +current_version = 0.4.2 commit = True message = Bump version: {current_version} → {new_version} tag = True diff --git a/node/package.json b/node/package.json index 74a238e9..fb15797d 100644 --- a/node/package.json +++ b/node/package.json @@ -1,6 +1,6 @@ { "name": "vectordb", - "version": "0.4.1", + "version": "0.4.2", "description": " Serverless, low-latency vector database for AI applications", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -81,10 +81,10 @@ } }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.1", - "@lancedb/vectordb-darwin-x64": "0.4.1", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.1", - "@lancedb/vectordb-linux-x64-gnu": "0.4.1", - "@lancedb/vectordb-win32-x64-msvc": "0.4.1" + "@lancedb/vectordb-darwin-arm64": "0.4.2", + "@lancedb/vectordb-darwin-x64": "0.4.2", + "@lancedb/vectordb-linux-arm64-gnu": "0.4.2", + "@lancedb/vectordb-linux-x64-gnu": "0.4.2", + "@lancedb/vectordb-win32-x64-msvc": "0.4.2" } } diff --git a/rust/ffi/node/Cargo.toml b/rust/ffi/node/Cargo.toml index e2569bc3..ce961a2d 100644 --- a/rust/ffi/node/Cargo.toml +++ b/rust/ffi/node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vectordb-node" -version = "0.4.1" +version = "0.4.2" description = "Serverless, low-latency vector database for AI applications" license = "Apache-2.0" edition = "2018" diff --git a/rust/vectordb/Cargo.toml b/rust/vectordb/Cargo.toml index 12bc7e45..f1bfe216 100644 --- a/rust/vectordb/Cargo.toml +++ b/rust/vectordb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vectordb" -version = "0.4.1" +version = "0.4.2" edition = "2021" description = "LanceDB: A serverless, low-latency vector database for AI applications" license = "Apache-2.0" From 0d2dbf7d095097f7958f525ee5d17e19053683bc Mon Sep 17 00:00:00 2001 From: Lance Release Date: Sat, 30 Dec 2023 00:53:51 +0000 Subject: [PATCH 16/43] Updating package-lock.json --- node/package-lock.json | 104 +++-------------------------------------- 1 file changed, 7 insertions(+), 97 deletions(-) diff --git a/node/package-lock.json b/node/package-lock.json index 56c8c28a..aca4d7da 100644 --- a/node/package-lock.json +++ b/node/package-lock.json @@ -1,12 +1,12 @@ { "name": "vectordb", - "version": "0.4.1", + "version": "0.4.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "vectordb", - "version": "0.4.1", + "version": "0.4.2", "cpu": [ "x64", "arm64" @@ -53,11 +53,11 @@ "uuid": "^9.0.0" }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.1", - "@lancedb/vectordb-darwin-x64": "0.4.1", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.1", - "@lancedb/vectordb-linux-x64-gnu": "0.4.1", - "@lancedb/vectordb-win32-x64-msvc": "0.4.1" + "@lancedb/vectordb-darwin-arm64": "0.4.2", + "@lancedb/vectordb-darwin-x64": "0.4.2", + "@lancedb/vectordb-linux-arm64-gnu": "0.4.2", + "@lancedb/vectordb-linux-x64-gnu": "0.4.2", + "@lancedb/vectordb-win32-x64-msvc": "0.4.2" } }, "node_modules/@apache-arrow/ts": { @@ -316,66 +316,6 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, - "node_modules/@lancedb/vectordb-darwin-arm64": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.1.tgz", - "integrity": "sha512-ul/Hvv5RX2RThpKSuiUjJRVrmXuBPvpU+HrLjcBmu4dzpuWN4+IeHIUM6xe79gLxOKlwkscVweTOuZnmMfsZeg==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@lancedb/vectordb-darwin-x64": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.1.tgz", - "integrity": "sha512-sJtF2Cv6T9RhUpdeHNkryiJwPuW9QPQ3aMs5fID1hMCJA2U3BX27t/WlkiPT2+kTLeUcwF1JvAOgsfvZkfvI8w==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@lancedb/vectordb-linux-arm64-gnu": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.1.tgz", - "integrity": "sha512-tNnziT0BRjPsznKI4GgWROFdCOsCGx0inFu0z+WV1UomwXKcMWGslpWBqKE8IUiCq14duPVx/ie7Wwcf51IeJQ==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@lancedb/vectordb-linux-x64-gnu": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.1.tgz", - "integrity": "sha512-PAcF2p1FUsC0AD+qkLfgE5+ZlQwlHe9eTP9dSsX43V/NGPDQ9+gBzaBTEDbvyHj1wl2Wft2NwOqB1HAFhilSDg==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@lancedb/vectordb-win32-x64-msvc": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.1.tgz", - "integrity": "sha512-8mvThCppI/AfSPby6Y3t6xpCfbo8IY6JH5exO8fDGTwBFHOqgwR4Izb2K7FgXxkwUYcN4EfGSsk/6B1GpwMudg==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "win32" - ] - }, "node_modules/@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", @@ -4868,36 +4808,6 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, - "@lancedb/vectordb-darwin-arm64": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.1.tgz", - "integrity": "sha512-ul/Hvv5RX2RThpKSuiUjJRVrmXuBPvpU+HrLjcBmu4dzpuWN4+IeHIUM6xe79gLxOKlwkscVweTOuZnmMfsZeg==", - "optional": true - }, - "@lancedb/vectordb-darwin-x64": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.1.tgz", - "integrity": "sha512-sJtF2Cv6T9RhUpdeHNkryiJwPuW9QPQ3aMs5fID1hMCJA2U3BX27t/WlkiPT2+kTLeUcwF1JvAOgsfvZkfvI8w==", - "optional": true - }, - "@lancedb/vectordb-linux-arm64-gnu": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.1.tgz", - "integrity": "sha512-tNnziT0BRjPsznKI4GgWROFdCOsCGx0inFu0z+WV1UomwXKcMWGslpWBqKE8IUiCq14duPVx/ie7Wwcf51IeJQ==", - "optional": true - }, - "@lancedb/vectordb-linux-x64-gnu": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.1.tgz", - "integrity": "sha512-PAcF2p1FUsC0AD+qkLfgE5+ZlQwlHe9eTP9dSsX43V/NGPDQ9+gBzaBTEDbvyHj1wl2Wft2NwOqB1HAFhilSDg==", - "optional": true - }, - "@lancedb/vectordb-win32-x64-msvc": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.1.tgz", - "integrity": "sha512-8mvThCppI/AfSPby6Y3t6xpCfbo8IY6JH5exO8fDGTwBFHOqgwR4Izb2K7FgXxkwUYcN4EfGSsk/6B1GpwMudg==", - "optional": true - }, "@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", From 24afea8c56ede37c4d9dbe088c5236d5bd3660de Mon Sep 17 00:00:00 2001 From: Lance Release Date: Sat, 30 Dec 2023 03:16:41 +0000 Subject: [PATCH 17/43] Updating package-lock.json --- node/package-lock.json | 90 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/node/package-lock.json b/node/package-lock.json index aca4d7da..541662fb 100644 --- a/node/package-lock.json +++ b/node/package-lock.json @@ -316,6 +316,66 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "node_modules/@lancedb/vectordb-darwin-arm64": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.2.tgz", + "integrity": "sha512-Ec73W2IHnZK4VC8g/7JyLbgcwcpNb9YI20yEhfTjEEFjJKoElZhDD/ZgghC3QQSRnrXFTxDzPK1V9BDT5QB2Hg==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@lancedb/vectordb-darwin-x64": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.2.tgz", + "integrity": "sha512-tj0JJlOfOdeSAfmM7EZhrhFdCFjoq9Bmrjt4741BNjtF+Nv4Otl53lFtUQrexTr4oh/E1yY1qaydJ7K++8u3UA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@lancedb/vectordb-linux-arm64-gnu": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.2.tgz", + "integrity": "sha512-OQ7ra5Q5RrLLwxIyI338KfQ2sSl8NJfqAHWvwiMtjCYFFYxIJGjX7U0I2MjSEPqJ5/ZoyjV4mjsvs0G1q20u+Q==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@lancedb/vectordb-linux-x64-gnu": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.2.tgz", + "integrity": "sha512-9tgIFSOYqNJzonnYsQr7v2gGdJm8aZ62UsVX2SWAIVhypoP4A05tAlbzjBgKO3R5xy5gpcW8tt/Pt8IsYWON7Q==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@lancedb/vectordb-win32-x64-msvc": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.2.tgz", + "integrity": "sha512-jhG3MqZ3r8BexXANLRNX57RAnCZT9psdSBORG3KTu5qe2xaunRlJNSA2kk8a79tf+gtUT/BAmMiXMzAi/dwq8w==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", @@ -4808,6 +4868,36 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "@lancedb/vectordb-darwin-arm64": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.2.tgz", + "integrity": "sha512-Ec73W2IHnZK4VC8g/7JyLbgcwcpNb9YI20yEhfTjEEFjJKoElZhDD/ZgghC3QQSRnrXFTxDzPK1V9BDT5QB2Hg==", + "optional": true + }, + "@lancedb/vectordb-darwin-x64": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.2.tgz", + "integrity": "sha512-tj0JJlOfOdeSAfmM7EZhrhFdCFjoq9Bmrjt4741BNjtF+Nv4Otl53lFtUQrexTr4oh/E1yY1qaydJ7K++8u3UA==", + "optional": true + }, + "@lancedb/vectordb-linux-arm64-gnu": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.2.tgz", + "integrity": "sha512-OQ7ra5Q5RrLLwxIyI338KfQ2sSl8NJfqAHWvwiMtjCYFFYxIJGjX7U0I2MjSEPqJ5/ZoyjV4mjsvs0G1q20u+Q==", + "optional": true + }, + "@lancedb/vectordb-linux-x64-gnu": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.2.tgz", + "integrity": "sha512-9tgIFSOYqNJzonnYsQr7v2gGdJm8aZ62UsVX2SWAIVhypoP4A05tAlbzjBgKO3R5xy5gpcW8tt/Pt8IsYWON7Q==", + "optional": true + }, + "@lancedb/vectordb-win32-x64-msvc": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.2.tgz", + "integrity": "sha512-jhG3MqZ3r8BexXANLRNX57RAnCZT9psdSBORG3KTu5qe2xaunRlJNSA2kk8a79tf+gtUT/BAmMiXMzAi/dwq8w==", + "optional": true + }, "@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", From cd791a366b3ab6bf06ac969aa0735dc50f030c94 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Tue, 2 Jan 2024 20:55:33 -0800 Subject: [PATCH 18/43] feat(js): support list of string input (#755) Add support for adding lists of string input (e.g., list of categorical labels) Follow-up items: #757 #758 --- node/src/arrow.ts | 28 +++++++++++++++++++++++++++- node/src/test/test.ts | 19 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/node/src/arrow.ts b/node/src/arrow.ts index 5421654a..90e59d31 100644 --- a/node/src/arrow.ts +++ b/node/src/arrow.ts @@ -20,7 +20,7 @@ import { Utf8, type Vector, FixedSizeList, - vectorFromArray, type Schema, Table as ArrowTable, RecordBatchStreamWriter + vectorFromArray, type Schema, Table as ArrowTable, RecordBatchStreamWriter, List, Float64 } from 'apache-arrow' import { type EmbeddingFunction } from './index' @@ -59,6 +59,24 @@ export async function convertToTable (data: Array>, e if (typeof values[0] === 'string') { // `vectorFromArray` converts strings into dictionary vectors, forcing it back to a string column records[columnsKey] = vectorFromArray(values, new Utf8()) + } else if (Array.isArray(values[0])) { + const elementType = getElementType(values[0]) + let innerType + if (elementType === 'string') { + innerType = new Utf8() + } else if (elementType === 'number') { + innerType = new Float64() + } else { + // TODO: pass in schema if it exists, else keep going to the next element + throw new Error(`Unsupported array element type ${elementType}`) + } + const listBuilder = makeBuilder({ + type: new List(new Field('item', innerType, true)) + }) + for (const value of values) { + listBuilder.append(value) + } + records[columnsKey] = listBuilder.finish().toVector() } else { records[columnsKey] = vectorFromArray(values) } @@ -68,6 +86,14 @@ export async function convertToTable (data: Array>, e return new ArrowTable(records) } +function getElementType (arr: any[]): string { + if (arr.length === 0) { + return 'undefined' + } + + return typeof arr[0] +} + // Creates a new Arrow ListBuilder that stores a Vector column function newVectorBuilder (dim: number): FixedSizeListBuilder { return makeBuilder({ diff --git a/node/src/test/test.ts b/node/src/test/test.ts index cb3fc6f1..e86d9613 100644 --- a/node/src/test/test.ts +++ b/node/src/test/test.ts @@ -218,6 +218,25 @@ describe('LanceDB client', function () { assert.equal(await table.countRows(), 2) }) + it('creates a new table from javascript objects with variable sized list', async function () { + const dir = await track().mkdir('lancejs') + const con = await lancedb.connect(dir) + + const data = [ + { id: 1, vector: [0.1, 0.2], list_of_str: ['a', 'b', 'c'], list_of_num: [1, 2, 3] }, + { id: 2, vector: [1.1, 1.2], list_of_str: ['x', 'y'], list_of_num: [4, 5, 6] } + ] + + const tableName = 'with_variable_sized_list' + const table = await con.createTable(tableName, data) as LocalTable + assert.equal(table.name, tableName) + assert.equal(await table.countRows(), 2) + const rs = await table.filter('id>1').execute() + assert.equal(rs.length, 1) + assert.deepEqual(rs[0].list_of_str, ['x', 'y']) + assert.isTrue(rs[0].list_of_num instanceof Float64Array) + }) + it('fails to create a new table when the vector column is missing', async function () { const dir = await track().mkdir('lancejs') const con = await lancedb.connect(dir) From 25d1c62c3faed620d1dbb0c0b08367770f998515 Mon Sep 17 00:00:00 2001 From: QianZhu Date: Wed, 3 Jan 2024 16:24:21 -0800 Subject: [PATCH 19/43] SaaS JS API sdk doc (#740) Co-authored-by: Aidan <64613310+aidangomar@users.noreply.github.com> --- .github/workflows/docs_test.yml | 3 + docs/mkdocs.yml | 1 + docs/src/ann_indexes.md | 2 + .../javascript/classes/RemoteConnection.md | 226 +++++++++++ docs/src/javascript/classes/RemoteQuery.md | 76 ++++ docs/src/javascript/classes/RemoteTable.md | 355 ++++++++++++++++++ docs/src/javascript/saas-modules.md | 92 +++++ 7 files changed, 755 insertions(+) create mode 100644 docs/src/javascript/classes/RemoteConnection.md create mode 100644 docs/src/javascript/classes/RemoteQuery.md create mode 100644 docs/src/javascript/classes/RemoteTable.md create mode 100644 docs/src/javascript/saas-modules.md diff --git a/.github/workflows/docs_test.yml b/.github/workflows/docs_test.yml index 084d563b..661e954f 100644 --- a/.github/workflows/docs_test.yml +++ b/.github/workflows/docs_test.yml @@ -88,6 +88,9 @@ jobs: cd docs/test node md_testing.js - name: Test + env: + LANCEDB_URI: ${{ secrets.LANCEDB_URI }} + LANCEDB_DEV_API_KEY: ${{ secrets.LANCEDB_DEV_API_KEY }} run: | cd docs/test/node for d in *; do cd "$d"; echo "$d".js; node "$d".js; cd ..; done diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index d25fc36c..e065e1ed 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -147,6 +147,7 @@ nav: - OSS Python API: python/python.md - SaaS Python API: python/saas-python.md - Javascript API: javascript/modules.md + - SaaS Javascript API: javascript/saas-modules.md - LanceDB Cloud↗: https://noteforms.com/forms/lancedb-mailing-list-cloud-kty1o5?notionforms=1&utm_source=notionforms extra_css: diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md index 6e9b7ac3..13b59d40 100644 --- a/docs/src/ann_indexes.md +++ b/docs/src/ann_indexes.md @@ -164,6 +164,7 @@ You can further filter the elements returned by a search using a where clause. const results_2 = await table .search(Array(1536).fill(1.2)) .where("id != '1141'") + .limit(2) .execute() ``` @@ -187,6 +188,7 @@ You can select the columns returned by the query using a select clause. const results_3 = await table .search(Array(1536).fill(1.2)) .select(["id"]) + .limit(2) .execute() ``` diff --git a/docs/src/javascript/classes/RemoteConnection.md b/docs/src/javascript/classes/RemoteConnection.md new file mode 100644 index 00000000..a3b627d7 --- /dev/null +++ b/docs/src/javascript/classes/RemoteConnection.md @@ -0,0 +1,226 @@ +[vectordb](../README.md) / [Exports](../saas-modules.md) / RemoteConnection + +# Class: RemoteConnection + +A connection to a remote LanceDB database. The class RemoteConnection implements interface Connection + +## Implements + +- [`Connection`](../interfaces/Connection.md) + +## Table of contents + +### Constructors + +- [constructor](RemoteConnection.md#constructor) + +### Methods + +- [createTable](RemoteConnection.md#createtable) +- [tableNames](RemoteConnection.md#tablenames) +- [openTable](RemoteConnection.md#opentable) +- [dropTable](RemoteConnection.md#droptable) + + +## Constructors + +### constructor + +• **new RemoteConnection**(`client`, `dbName`) + +#### Parameters + +| Name | Type | +| :------ | :------ | +| `client` | `HttpLancedbClient` | +| `dbName` | `string` | + +#### Defined in + +[remote/index.ts:37](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L37) + +## Methods + +### createTable + +▸ **createTable**(`name`, `data`, `mode?`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\> + +Creates a new Table and initialize it with new data. + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `name` | `string` | The name of the table. | +| `data` | `Record`<`string`, `unknown`\>[] | Non-empty Array of Records to be inserted into the Table | +| `mode?` | [`WriteMode`](../enums/WriteMode.md) | The write mode to use when creating the table. | + +#### Returns + +`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\> + +#### Implementation of + +[Connection](../interfaces/Connection.md).[createTable](../interfaces/Connection.md#createtable) + +#### Defined in + +[remote/index.ts:75](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L75) + +▸ **createTable**(`name`, `data`, `mode`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\> + +#### Parameters + +| Name | Type | +| :------ | :------ | +| `name` | `string` | +| `data` | `Record`<`string`, `unknown`\>[] | +| `mode` | [`WriteMode`](../enums/WriteMode.md) | +| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table | + +#### Returns + +`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\> + +#### Implementation of + +Connection.createTable + +#### Defined in + +[remote/index.ts:231](https://github.com/lancedb/lancedb/blob/b1eeb90/node/src/index.ts#L231) + +___ + +### dropTable + +▸ **dropTable**(`name`): `Promise`<`void`\> + +Drop an existing table. + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `name` | `string` | The name of the table to drop. | + +#### Returns + +`Promise`<`void`\> + +#### Implementation of + +[Connection](../interfaces/Connection.md).[dropTable](../interfaces/Connection.md#droptable) + +#### Defined in + +[remote/index.ts:131](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L131) + +___ + +### openTable + +▸ **openTable**(`name`): `Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\> + +Open a table in the database. + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `name` | `string` | The name of the table. | + +#### Returns + +`Promise`<[`Table`](../interfaces/Table.md)<`number`[]\>\> + +#### Implementation of + +[Connection](../interfaces/Connection.md).[openTable](../interfaces/Connection.md#opentable) + +#### Defined in + +[remote/index.ts:65](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L65) + +▸ **openTable**<`T`\>(`name`, `embeddings`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\> + +Open a table in the database. + +#### Type parameters + +| Name | +| :------ | +| `T` | + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `name` | `string` | The name of the table. | +| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use on this Table | + +#### Returns + +`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\> + +#### Implementation of + +Connection.openTable + +#### Defined in + +[remote/index.ts:66](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L66) + +▸ **openTable**<`T`\>(`name`, `embeddings?`): `Promise`<[`Table`](../interfaces/Table.md)<`T`\>\> + +#### Type parameters + +| Name | +| :------ | +| `T` | + +#### Parameters + +| Name | Type | +| :------ | :------ | +| `name` | `string` | +| `embeddings?` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | + +#### Returns + +`Promise`<[`Table`](../interfaces/Table.md)<`T`\>\> + +#### Implementation of + +Connection.openTable + +#### Defined in + +[remote/index.ts:67](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L67) + +___ + +### tableNames + +▸ **tableNames**(): `Promise`<`string`[]\> + +Get the names of all tables in the database, with pagination. + +#### Parameters + +| Name | Type | +| :------ | :------ | +| `pageToken` | `string` | +| `limit` | `int` | + +#### Returns + +`Promise`<`string`[]\> + +#### Implementation of + +[Connection](../interfaces/Connection.md).[tableNames](../interfaces/Connection.md#tablenames) + +#### Defined in + +[remote/index.ts:60](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L60) diff --git a/docs/src/javascript/classes/RemoteQuery.md b/docs/src/javascript/classes/RemoteQuery.md new file mode 100644 index 00000000..4ff7d366 --- /dev/null +++ b/docs/src/javascript/classes/RemoteQuery.md @@ -0,0 +1,76 @@ +[vectordb](../README.md) / [Exports](../saas-modules.md) / RemoteQuery + +# Class: Query + +A builder for nearest neighbor queries for LanceDB. + +## Type parameters + +| Name | Type | +| :------ | :------ | +| `T` | `number`[] | + +## Table of contents + +### Constructors + +- [constructor](RemoteQuery.md#constructor) + +### Properties + +- [\_embeddings](RemoteQuery.md#_embeddings) +- [\_query](RemoteQuery.md#_query) +- [\_name](RemoteQuery.md#_name) +- [\_client](RemoteQuery.md#_client) + +### Methods + +- [execute](RemoteQuery.md#execute) + + +## Constructors + +### constructor + +• **new Query**<`T`\>(`name`, `client`, `query`, `embeddings?`) + +#### Type parameters + +| Name | Type | +| :------ | :------ | +| `T` | `number`[] | + +#### Parameters + +| Name | Type | +| :------ | :------ | +| `name` | `string` | +| `client` | `HttpLancedbClient` | +| `query` | `T` | +| `embeddings?` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | + +#### Defined in + +[remote/index.ts:137](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L137) + +## Methods + +### execute + +▸ **execute**<`T`\>(): `Promise`<`T`[]\> + +Execute the query and return the results as an Array of Objects + +#### Type parameters + +| Name | Type | +| :------ | :------ | +| `T` | `Record`<`string`, `unknown`\> | + +#### Returns + +`Promise`<`T`[]\> + +#### Defined in + +[remote/index.ts:143](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L143) \ No newline at end of file diff --git a/docs/src/javascript/classes/RemoteTable.md b/docs/src/javascript/classes/RemoteTable.md new file mode 100644 index 00000000..5c176207 --- /dev/null +++ b/docs/src/javascript/classes/RemoteTable.md @@ -0,0 +1,355 @@ +[vectordb](../README.md) / [Exports](../saas-modules.md) / RemoteTable + +# Class: RemoteTable + +A LanceDB Table is the collection of Records. Each Record has one or more vector fields. + +## Type parameters + +| Name | Type | +| :------ | :------ | +| `T` | `number`[] | + +## Implements + +- [`Table`](../interfaces/Table.md)<`T`\> + +## Table of contents + +### Constructors + +- [constructor](RemoteTable.md#constructor) + +### Properties + +- [\_name](RemoteTable.md#_name) +- [\_client](RemoteTable.md#_client) +- [\_embeddings](RemoteTable.md#_embeddings) + +### Accessors + +- [name](RemoteTable.md#name) + +### Methods + +- [add](RemoteTable.md#add) +- [countRows](RemoteTable.md#countrows) +- [createIndex](RemoteTable.md#createindex) +- [delete](RemoteTable.md#delete) +- [listIndices](classes/RemoteTable.md#listindices) +- [indexStats](classes/RemoteTable.md#liststats) +- [overwrite](RemoteTable.md#overwrite) +- [search](RemoteTable.md#search) +- [schema](classes/RemoteTable.md#schema) +- [update](RemoteTable.md#update) + +## Constructors + +### constructor + +• **new RemoteTable**<`T`\>(`client`, `name`) + +#### Type parameters + +| Name | Type | +| :------ | :------ | +| `T` | `number`[] | + +#### Parameters + +| Name | Type | +| :------ | :------ | +| `client` | `HttpLancedbClient` | +| `name` | `string` | + +#### Defined in + +[remote/index.ts:186](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L186) + +• **new RemoteTable**<`T`\>(`client`, `name`, `embeddings`) + +#### Type parameters + +| Name | Type | +| :------ | :------ | +| `T` | `number`[] | + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `client` | `HttpLancedbClient` | | +| `name` | `string` | | +| `embeddings` | [`EmbeddingFunction`](../interfaces/EmbeddingFunction.md)<`T`\> | An embedding function to use when interacting with this table | + +#### Defined in + +[remote/index.ts:187](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L187) + +## Accessors + +### name + +• `get` **name**(): `string` + +#### Returns + +`string` + +#### Implementation of + +[Table](../interfaces/Table.md).[name](../interfaces/Table.md#name) + +#### Defined in + +[remote/index.ts:194](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L194) + +## Methods + +### add + +▸ **add**(`data`): `Promise`<`number`\> + +Insert records into this Table. + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table | + +#### Returns + +`Promise`<`number`\> + +The number of rows added to the table + +#### Implementation of + +[Table](../interfaces/Table.md).[add](../interfaces/Table.md#add) + +#### Defined in + +[remote/index.ts:293](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L293) + +___ + +### countRows + +▸ **countRows**(): `Promise`<`number`\> + +Returns the number of rows in this table. + +#### Returns + +`Promise`<`number`\> + +#### Implementation of + +[Table](../interfaces/Table.md).[countRows](../interfaces/Table.md#countrows) + +#### Defined in + +[remote/index.ts:290](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L290) + +___ + +### createIndex + +▸ **createIndex**(`metric_type`, `column`, `index_cache_size`): `Promise`<`any`\> + +Create an ANN index on this Table vector index. + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `metric_type` | `string` | distance metric type, L2 or cosine or dot | +| `column` | `string` | the name of the column to be indexed | + +#### Returns + +`Promise`<`any`\> + +#### Implementation of + +[Table](../interfaces/Table.md).[createIndex](../interfaces/Table.md#createindex) + +#### Defined in + +[remote/index.ts:249](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L249) + +___ + +### delete + +▸ **delete**(`filter`): `Promise`<`void`\> + +Delete rows from this table. + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `filter` | `string` | A filter in the same format used by a sql WHERE clause. | + +#### Returns + +`Promise`<`void`\> + +#### Implementation of + +[Table](../interfaces/Table.md).[delete](../interfaces/Table.md#delete) + +#### Defined in + +[remote/index.ts:295](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L295) + +___ + +### overwrite + +▸ **overwrite**(`data`): `Promise`<`number`\> + +Insert records into this Table, replacing its contents. + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `data` | `Record`<`string`, `unknown`\>[] | Records to be inserted into the Table | + +#### Returns + +`Promise`<`number`\> + +The number of rows added to the table + +#### Implementation of + +[Table](../interfaces/Table.md).[overwrite](../interfaces/Table.md#overwrite) + +#### Defined in + +[remote/index.ts:231](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L231) + +___ + +### search + +▸ **search**(`query`): [`Query`](Query.md)<`T`\> + +Creates a search query to find the nearest neighbors of the given search term + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `query` | `T` | The query search term | + +#### Returns + +[`Query`](Query.md)<`T`\> + +#### Implementation of + +[Table](../interfaces/Table.md).[search](../interfaces/Table.md#search) + +#### Defined in + +[remote/index.ts:209](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L209) + +___ + +### update + +▸ **update**(`args`): `Promise`<`void`\> + +Update zero to all rows depending on how many rows match the where clause. + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `args` | `UpdateArgs` or `UpdateSqlArgs` | The query search arguments | + +#### Returns + +`Promise`<`any`\> + +#### Implementation of + +[Table](../interfaces/Table.md).[search](../interfaces/Table.md#update) + +#### Defined in + +[remote/index.ts:299](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L299) + +___ + +### schema + +▸ **schema**(): `Promise`<`void`\> + +Get the schema of the table + + +#### Returns + +`Promise`<`any`\> + +#### Implementation of + +[Table](../interfaces/Table.md).[search](../interfaces/Table.md#schema) + +#### Defined in + +[remote/index.ts:198](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L198) + +___ + +### listIndices + +▸ **listIndices**(): `Promise`<`void`\> + +List the indices of the table + + +#### Returns + +`Promise`<`any`\> + +#### Implementation of + +[Table](../interfaces/Table.md).[search](../interfaces/Table.md#listIndices) + +#### Defined in + +[remote/index.ts:319](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L319) + +___ + +### indexStats + +▸ **indexStats**(`indexUuid`): `Promise`<`void`\> + +Get the indexed/unindexed of rows from the table + +#### Parameters + +| Name | Type | Description | +| :------ | :------ | :------ | +| `indexUuid` | `string` | the uuid of the index | + +#### Returns + +`Promise`<`numIndexedRows`\> +`Promise`<`numUnindexedRows`\> + +#### Implementation of + +[Table](../interfaces/Table.md).[search](../interfaces/Table.md#indexStats) + +#### Defined in + +[remote/index.ts:328](https://github.com/lancedb/lancedb/blob/main/node/src/remote/index.ts#L328) \ No newline at end of file diff --git a/docs/src/javascript/saas-modules.md b/docs/src/javascript/saas-modules.md new file mode 100644 index 00000000..22253972 --- /dev/null +++ b/docs/src/javascript/saas-modules.md @@ -0,0 +1,92 @@ +# Table of contents + +## Installation + +```bash +npm install vectordb +``` + +This will download the appropriate native library for your platform. We currently +support x86_64 Linux, aarch64 Linux, Intel MacOS, and ARM (M1/M2) MacOS. We do not +yet support Windows or musl-based Linux (such as Alpine Linux). + + +## Classes +- [RemoteConnection](classes/RemoteConnection.md) +- [RemoteTable](classes/RemoteTable.md) +- [RemoteQuery](classes/RemoteQuery.md) + + +## Methods + +- [add](classes/RemoteTable.md#add) +- [countRows](classes/RemoteTable.md#countrows) +- [createIndex](classes/RemoteTable.md#createindex) +- [createTable](classes/RemoteConnection.md#createtable) +- [delete](classes/RemoteTable.md#delete) +- [dropTable](classes/RemoteConnection.md#droptable) +- [listIndices](classes/RemoteTable.md#listindices) +- [indexStats](classes/RemoteTable.md#liststats) +- [openTable](classes/RemoteConnection.md#opentable) +- [overwrite](classes/RemoteTable.md#overwrite) +- [schema](classes/RemoteTable.md#schema) +- [search](classes/RemoteTable.md#search) +- [tableNames](classes/RemoteConnection.md#tablenames) +- [update](classes/RemoteTable.md#update) + + +## Example code +```javascript + +const lancedb = require('vectordb'); +const { Schema, Field, Int32, Float32, Utf8, FixedSizeList } = require ("apache-arrow/Arrow.node") + +// connect to a remote DB +const devApiKey = process.env.LANCEDB_DEV_API_KEY +const dbURI = process.env.LANCEDB_URI +const db = await lancedb.connect({ + uri: dbURI, // replace dbURI with your project, e.g. "db://your-project-name" + apiKey: devApiKey, // replace dbURI with your api key + region: "us-east-1-dev" +}); +// create a new table +const tableName = "my_table_000" +const data = [ + { id: 1, vector: [0.1, 1.0], item: "foo", price: 10.0 }, + { id: 2, vector: [3.9, 0.5], item: "bar", price: 20.0 } +] +const schema = new Schema( + [ + new Field('id', new Int32()), + new Field('vector', new FixedSizeList(2, new Field('float32', new Float32()))), + new Field('item', new Utf8()), + new Field('price', new Float32()) + ] +) +const table = await db.createTable({ + name: tableName, + schema, +}, data) + +// list the table +const tableNames_1 = await db.tableNames('') +// add some data and search should be okay +const newData = [ + { id: 3, vector: [10.3, 1.9], item: "test1", price: 30.0 }, + { id: 4, vector: [6.2, 9.2], item: "test2", price: 40.0 } +] +table.add(newData) +// create the index for the table +await table.createIndex({ + metric_type: "L2", + column: "vector" +}) +let result = await table.search([2.8, 4.3]).select(["vector", "price"]).limit(1).execute() +// update the data +await table.update({ + where: "id == 1", + values: { item: "foo1" } +}) +//drop the table +await db.dropTable(tableName) +``` \ No newline at end of file From e3ba5b2402e36f1adc7a308d35ca641dd649afd4 Mon Sep 17 00:00:00 2001 From: Bengsoon Chuah <43534911+bengsoon@users.noreply.github.com> Date: Fri, 5 Jan 2024 03:15:42 +0800 Subject: [PATCH 20/43] Add relevant imports for each step (#764) I found that it was quite incoherent to have to read through the documentation and having to search which submodule that each class should be imported from. For example, it is cumbersome to have to navigate to another documentation page to find out that `EmbeddingFunctionRegistry` is from `lancedb.embeddings` --- docs/src/embeddings/embedding_functions.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/src/embeddings/embedding_functions.md b/docs/src/embeddings/embedding_functions.md index 9390bf8a..e7561fcd 100644 --- a/docs/src/embeddings/embedding_functions.md +++ b/docs/src/embeddings/embedding_functions.md @@ -8,6 +8,8 @@ You can simply follow these steps and forget about the details of your embedding ### Step 1 - Define the embedding function We have some pre-defined embedding functions in the global registry with more coming soon. Here's let's an implementation of CLIP as example. ``` +from lancedb.embeddings import EmbeddingFunctionRegistry + registry = EmbeddingFunctionRegistry.get_instance() clip = registry.get("open-clip").create() @@ -18,6 +20,8 @@ You can also define your own embedding function by implementing the `EmbeddingFu Our embedding function from the previous section abstracts away all the details about the models and dimensions required to define the schema. You can simply set a feild as **source** or **vector** column. Here's how ```python +from lancedb.pydantic import LanceModel, Vector + class Pets(LanceModel): vector: Vector(clip.ndims) = clip.VectorField() image_uri: str = clip.SourceField() @@ -30,6 +34,8 @@ class Pets(LanceModel): Now that we have chosen/defined our embedding function and the schema, we can create the table ```python +import lancedb + db = lancedb.connect("~/lancedb") table = db.create_table("pets", schema=Pets) @@ -52,6 +58,8 @@ result = table.search("dog") Let's query an image ```python +from pathlib import Path + p = Path("path/to/images/samoyed_100.jpg") query_image = Image.open(p) table.search(query_image) @@ -75,6 +83,8 @@ Embedding functions can also fail due to other errors that have nothing to do wi LanceDB is integrated with PyDantic. Infact we've used the integration in the above example to define the schema. It is also being used behing the scene by the embdding function API to ingest useful information as table metadata. You can also use it for adding utility operations in the schema. For example, in our multi-modal example, you can search images using text or another image. Let us define a utility function to plot the image. ```python +from lancedb.pydantic import LanceModel, Vector + class Pets(LanceModel): vector: Vector(clip.ndims) = clip.VectorField() image_uri: str = clip.SourceField() From e9294911878e0a2bdb8422bbd6aeb15d56a21b3c Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:45:12 -0800 Subject: [PATCH 21/43] chore(python): handle NaN input in fts ingestion (#763) If the input text is None, Tantivy raises an error complaining it cannot add a NoneType. We handle this upstream so None's are not added to the document. If all of the indexed fields are None then we skip this document. --- python/lancedb/fts.py | 9 ++++++--- python/tests/test_fts.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/python/lancedb/fts.py b/python/lancedb/fts.py index f187be8d..eb5c37e3 100644 --- a/python/lancedb/fts.py +++ b/python/lancedb/fts.py @@ -103,10 +103,13 @@ def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) - b = b.flatten() for i in range(b.num_rows): doc = tantivy.Document() - doc.add_integer("doc_id", row_id) for name in fields: - doc.add_text(name, b[name][i].as_py()) - writer.add_document(doc) + value = b[name][i].as_py() + if value is not None: + doc.add_text(name, value) + if not doc.is_empty: + doc.add_integer("doc_id", row_id) + writer.add_document(doc) row_id += 1 # commit changes writer.commit() diff --git a/python/tests/test_fts.py b/python/tests/test_fts.py index f09b44ef..baa07096 100644 --- a/python/tests/test_fts.py +++ b/python/tests/test_fts.py @@ -147,3 +147,18 @@ def test_search_index_with_filter(table): assert r["id"] == 1 assert rs == rs2 + + +def test_null_input(table): + table.add( + [ + { + "vector": np.random.randn(128), + "id": 101, + "text": None, + "text2": None, + "nested": {"text": None}, + } + ] + ) + table.create_fts_index("text") From a25d10279cf9974ba8b9e10ac715ef64c5ddfaa2 Mon Sep 17 00:00:00 2001 From: QianZhu Date: Thu, 4 Jan 2024 14:30:34 -0800 Subject: [PATCH 22/43] small bug fix for example code in SaaS JS doc (#770) --- docs/src/javascript/saas-modules.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/javascript/saas-modules.md b/docs/src/javascript/saas-modules.md index 22253972..f9bdd83d 100644 --- a/docs/src/javascript/saas-modules.md +++ b/docs/src/javascript/saas-modules.md @@ -75,7 +75,7 @@ const newData = [ { id: 3, vector: [10.3, 1.9], item: "test1", price: 30.0 }, { id: 4, vector: [6.2, 9.2], item: "test2", price: 40.0 } ] -table.add(newData) +await table.add(newData) // create the index for the table await table.createIndex({ metric_type: "L2", From 2fd829296e647e67da63fb4f609710be21c6ed3a Mon Sep 17 00:00:00 2001 From: Vladimir Varankin Date: Sun, 7 Jan 2024 23:26:35 +0100 Subject: [PATCH 23/43] Minor corrections for docs of embedding_functions (#780) In addition to #777, this pull request fixes more typos in the documentation for "Ingest Embedding Functions". --- docs/src/embeddings/embedding_functions.md | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/src/embeddings/embedding_functions.md b/docs/src/embeddings/embedding_functions.md index e7561fcd..fd70cfc0 100644 --- a/docs/src/embeddings/embedding_functions.md +++ b/docs/src/embeddings/embedding_functions.md @@ -38,9 +38,9 @@ import lancedb db = lancedb.connect("~/lancedb") table = db.create_table("pets", schema=Pets) - ``` -That's it! We have ingested all the information needed to embed source and query inputs. We can now forget about the model and dimension details and start to build or VectorDB + +That's it! We have ingested all the information needed to embed source and query inputs. We can now forget about the model and dimension details and start to build our VectorDB. ### Step 4 - Ingest lots of data and run vector search! Now you can just add the data and it'll be vectorized automatically @@ -63,25 +63,24 @@ from pathlib import Path p = Path("path/to/images/samoyed_100.jpg") query_image = Image.open(p) table.search(query_image) - ``` + ### Rate limit Handling -`EmbeddingFunction` class wraps the calls for source and query embedding generation inside a rate limit handler that retries the requests with exponential backoff after successive failures. By default the maximum retires is set to 7. You can tune it by setting it to a different number or disable it by setting it to 0. -Example ----- +`EmbeddingFunction` class wraps the calls for source and query embedding generation inside a rate limit handler that retries the requests with exponential backoff after successive failures. By default the maximum retires is set to 7. You can tune it by setting it to a different number or disable it by setting it to 0. Example: ```python clip = registry.get("open-clip").create() # Defaults to 7 max retries clip = registry.get("open-clip").create(max_retries=10) # Increase max retries to 10 clip = registry.get("open-clip").create(max_retries=0) # Retries disabled -```` +``` NOTE: -Embedding functions can also fail due to other errors that have nothing to do with rate limits. This is why the error is also logged. +Embedding functions can also fail due to other errors that have nothing to do with rate limits. This is why the errors are also logged. ### A little fun with PyDantic -LanceDB is integrated with PyDantic. Infact we've used the integration in the above example to define the schema. It is also being used behing the scene by the embdding function API to ingest useful information as table metadata. -You can also use it for adding utility operations in the schema. For example, in our multi-modal example, you can search images using text or another image. Let us define a utility function to plot the image. +LanceDB is integrated with PyDantic. In fact, we've used the integration in the above example to define the schema. It is also being used behind the scene by the embedding function API to ingest useful information as table metadata. +You can also use it for adding utility operations in the schema. For example, in our multi-modal example, you can search images using text or another image. Let's define a utility function to plot the image. + ```python from lancedb.pydantic import LanceModel, Vector @@ -93,7 +92,8 @@ class Pets(LanceModel): def image(self): return Image.open(self.image_uri) ``` -Now, you can covert your search results to pydantic model and use this property. + +Now, you can covert your search results to PyDantic model and use its property. ```python rs = table.search(query_image).limit(3).to_pydantic(Pets) @@ -102,4 +102,4 @@ rs[2].image ![](../assets/dog_clip_output.png) -Now that you've the basic idea about LanceDB embedding function, let us now dive deeper into the API that you can use to implement your own embedding functions! +Now that you have the basic idea about LanceDB embedding function, let us dive deeper into the API that you can use to implement your own embedding functions! From 6698376f02d10bafc745fdca943d53b9543d5a51 Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 7 Jan 2024 14:27:40 -0800 Subject: [PATCH 24/43] Minor Fixes to Ingest Embedding Functions Docs (#777) Addressed minor typos and grammatical issues to improve readability --------- Co-authored-by: Christopher Correa --- docs/src/embeddings/embedding_functions.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/src/embeddings/embedding_functions.md b/docs/src/embeddings/embedding_functions.md index fd70cfc0..630c0e3f 100644 --- a/docs/src/embeddings/embedding_functions.md +++ b/docs/src/embeddings/embedding_functions.md @@ -1,7 +1,6 @@ -Representing multi-modal data as vector embeddings is becoming a standard practice. Embedding functions themselves be thought of as a part of the processing pipeline that each request(input) has to be passed through. After initial setup these components are not expected to change for a particular project. - -This is main motivation behind our new embedding functions API, that allow you simply set it up once and the table remembers it, effectively making the **embedding functions disappear in the background** so you don't have to worry about modelling and simply focus on the DB aspects of VectorDB. +Representing multi-modal data as vector embeddings is becoming a standard practice. Embedding functions themselves can be thought of as a part of the processing pipeline that each request(input) has to be passed through. After initial setup these components are not expected to change for a particular project. +Our new embedding functions API allow you simply set it up once and the table remembers it, effectively making the **embedding functions disappear in the background** so you don't have to worry about modelling and can simply focus on the DB aspects of VectorDB. You can simply follow these steps and forget about the details of your embedding functions as long as you don't intend to change it. @@ -17,7 +16,7 @@ clip = registry.get("open-clip").create() You can also define your own embedding function by implementing the `EmbeddingFunction` abstract base interface. It subclasses PyDantic Model which can be utilized to write complex schemas simply as we'll see next! ### Step 2 - Define the Data Model or Schema -Our embedding function from the previous section abstracts away all the details about the models and dimensions required to define the schema. You can simply set a feild as **source** or **vector** column. Here's how +Our embedding function from the previous section abstracts away all the details about the models and dimensions required to define the schema. You can simply set a field as **source** or **vector** column. Here's how ```python from lancedb.pydantic import LanceModel, Vector From 8a48b3268905d97f0307f5b250d1df17f8e7061b Mon Sep 17 00:00:00 2001 From: sudhir Date: Mon, 8 Jan 2024 06:27:56 +0800 Subject: [PATCH 25/43] Make examples work with current version of Openai api's (#779) These examples don't work because of changes in openai api from version 1+ --- docs/src/notebooks/code_qa_bot.ipynb | 9 +++---- .../notebooks/youtube_transcript_search.ipynb | 26 +++++++++---------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/docs/src/notebooks/code_qa_bot.ipynb b/docs/src/notebooks/code_qa_bot.ipynb index de787aca..5ff18a0c 100644 --- a/docs/src/notebooks/code_qa_bot.ipynb +++ b/docs/src/notebooks/code_qa_bot.ipynb @@ -44,15 +44,14 @@ "metadata": {}, "outputs": [], "source": [ - "import openai\n", + "from openai import OpenAI\n", "import os\n", "\n", "# Configuring the environment variable OPENAI_API_KEY\n", "if \"OPENAI_API_KEY\" not in os.environ:\n", - " # OR set the key here as a variable\n", - " openai.api_key = \"sk-...\"\n", - " \n", - "assert len(openai.Model.list()[\"data\"]) > 0" + " os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n", + "client = OpenAI()\n", + "assert len(client.models.list().data) > 0" ] }, { diff --git a/docs/src/notebooks/youtube_transcript_search.ipynb b/docs/src/notebooks/youtube_transcript_search.ipynb index b39a9c8e..e3cac08a 100644 --- a/docs/src/notebooks/youtube_transcript_search.ipynb +++ b/docs/src/notebooks/youtube_transcript_search.ipynb @@ -27,11 +27,11 @@ "output_type": "stream", "text": [ "\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.1.1\u001B[0m\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.1.1\u001B[0m\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n" + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], @@ -206,15 +206,16 @@ "metadata": {}, "outputs": [], "source": [ - "import openai\n", + "from openai import OpenAI\n", "import os\n", "\n", "# Configuring the environment variable OPENAI_API_KEY\n", "if \"OPENAI_API_KEY\" not in os.environ:\n", " # OR set the key here as a variable\n", - " openai.api_key = \"sk-...\"\n", + " os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n", " \n", - "assert len(openai.Model.list()[\"data\"]) > 0" + "client = OpenAI()\n", + "assert len(client.models.list().data) > 0" ] }, { @@ -234,8 +235,8 @@ "outputs": [], "source": [ "def embed_func(c): \n", - " rs = openai.Embedding.create(input=c, engine=\"text-embedding-ada-002\")\n", - " return [record[\"embedding\"] for record in rs[\"data\"]]" + " rs = client.embeddings.create(input=c, model=\"text-embedding-ada-002\")\n", + " return [rs.data[0].embedding]" ] }, { @@ -536,9 +537,8 @@ ], "source": [ "def complete(prompt):\n", - " # query text-davinci-003\n", - " res = openai.Completion.create(\n", - " engine='text-davinci-003',\n", + " res = client.completions.create(\n", + " model='text-davinci-003',\n", " prompt=prompt,\n", " temperature=0,\n", " max_tokens=400,\n", @@ -547,7 +547,7 @@ " presence_penalty=0,\n", " stop=None\n", " )\n", - " return res['choices'][0]['text'].strip()\n", + " return res.choices[0].text\n", "\n", "# check that it works\n", "query = \"who was the 12th person on the moon and when did they land?\"\n", From 328aa2247bab187abdf36b82ba08a8a25c4b092b Mon Sep 17 00:00:00 2001 From: lucasiscovici Date: Sun, 7 Jan 2024 23:34:04 +0100 Subject: [PATCH 26/43] raise exception if fts index does not exist (#776) raise exception if fts index does not exist --------- Co-authored-by: Chang She <759245+changhiskhan@users.noreply.github.com> --- python/lancedb/query.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/lancedb/query.py b/python/lancedb/query.py index 3bdc763b..dcc72c58 100644 --- a/python/lancedb/query.py +++ b/python/lancedb/query.py @@ -14,6 +14,7 @@ from __future__ import annotations from abc import ABC, abstractmethod +from pathlib import Path from typing import TYPE_CHECKING, List, Literal, Optional, Type, Union import deprecation @@ -480,6 +481,12 @@ class LanceFtsQueryBuilder(LanceQueryBuilder): # get the index path index_path = self._table._get_fts_index_path() + # check if the index exist + if not Path(index_path).exists(): + raise FileNotFoundError( + "Fts index does not exist." + f"Please first call table.create_fts_index(['']) to create the fts index." + ) # open the index index = tantivy.Index.open(index_path) # get the scores and doc ids From 3100f0d861e86aeffdd07b4da04c3360a2caf017 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Sun, 7 Jan 2024 15:15:13 -0800 Subject: [PATCH 27/43] feat(python): Set heap size to get faster fts indexing performance (#762) By default tantivy-py uses 128MB heapsize. We change the default to 1GB and we allow the user to customize this locally this makes `test_fts.py` run 10x faster --- docs/src/fts.md | 12 ++++++++++++ python/lancedb/fts.py | 13 ++++++++++--- python/lancedb/table.py | 9 +++++++-- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/docs/src/fts.md b/docs/src/fts.md index 78c20f6b..721a5cf1 100644 --- a/docs/src/fts.md +++ b/docs/src/fts.md @@ -75,6 +75,18 @@ applied on top of the full text search results. This can be invoked via the fami table.search("puppy").limit(10).where("meta='foo'").to_list() ``` +## Configurations + +By default, LanceDB configures a 1GB heap size limit for creating the index. You can +reduce this if running on a smaller node, or increase this for faster performance while +indexing a larger corpus. + +```python +# configure a 512MB heap size +heap = 1024 * 1024 * 512 +table.create_fts_index(["text1", "text2"], writer_heap_size=heap, replace=True) +``` + ## Current limitations 1. Currently we do not yet support incremental writes. diff --git a/python/lancedb/fts.py b/python/lancedb/fts.py index eb5c37e3..f9667fcc 100644 --- a/python/lancedb/fts.py +++ b/python/lancedb/fts.py @@ -13,7 +13,7 @@ """Full text search index using tantivy-py""" import os -from typing import List, Tuple +from typing import List, Optional, Tuple import pyarrow as pa @@ -56,7 +56,12 @@ def create_index(index_path: str, text_fields: List[str]) -> tantivy.Index: return index -def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) -> int: +def populate_index( + index: tantivy.Index, + table: LanceTable, + fields: List[str], + writer_heap_size: int = 1024 * 1024 * 1024, +) -> int: """ Populate an index with data from a LanceTable @@ -68,6 +73,8 @@ def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) - The table to index fields : List[str] List of fields to index + writer_heap_size : int + The writer heap size in bytes, defaults to 1GB Returns ------- @@ -87,7 +94,7 @@ def populate_index(index: tantivy.Index, table: LanceTable, fields: List[str]) - raise TypeError(f"Field {name} is not a string type") # create a tantivy writer - writer = index.writer() + writer = index.writer(heap_size=writer_heap_size) # write data into index dataset = table.to_lance() row_id = 0 diff --git a/python/lancedb/table.py b/python/lancedb/table.py index 49b0d5b3..7db8a3d1 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -707,7 +707,11 @@ class LanceTable(Table): self._dataset.create_scalar_index(column, index_type="BTREE", replace=replace) def create_fts_index( - self, field_names: Union[str, List[str]], *, replace: bool = False + self, + field_names: Union[str, List[str]], + *, + replace: bool = False, + writer_heap_size: Optional[int] = 1024 * 1024 * 1024, ): """Create a full-text search index on the table. @@ -722,6 +726,7 @@ class LanceTable(Table): If True, replace the existing index if it exists. Note that this is not yet an atomic operation; the index will be temporarily unavailable while the new index is being created. + writer_heap_size: int, default 1GB """ from .fts import create_index, populate_index @@ -738,7 +743,7 @@ class LanceTable(Table): fs.delete_dir(path) index = create_index(self._get_fts_index_path(), field_names) - populate_index(index, self, field_names) + populate_index(index, self, field_names, writer_heap_size=writer_heap_size) def _get_fts_index_path(self): return join_uri(self._dataset_uri, "_indices", "tantivy") From 4c8690549a76131120bcd7e8efb0ee366dba2231 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Sun, 7 Jan 2024 19:27:47 -0800 Subject: [PATCH 28/43] chore: bump lance to 0.9.5 (#790) --- Cargo.toml | 8 ++++---- python/pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f5c73a96..8cc810ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,10 +5,10 @@ exclude = ["python"] resolver = "2" [workspace.dependencies] -lance = { "version" = "=0.9.2", "features" = ["dynamodb"] } -lance-index = { "version" = "=0.9.2" } -lance-linalg = { "version" = "=0.9.2" } -lance-testing = { "version" = "=0.9.2" } +lance = { "version" = "=0.9.5", "features" = ["dynamodb"] } +lance-index = { "version" = "=0.9.5" } +lance-linalg = { "version" = "=0.9.5" } +lance-testing = { "version" = "=0.9.5" } # Note that this one does not include pyarrow arrow = { version = "49.0.0", optional = false } arrow-array = "49.0" diff --git a/python/pyproject.toml b/python/pyproject.toml index 6580d804..71e532d3 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -3,7 +3,7 @@ name = "lancedb" version = "0.4.3" dependencies = [ "deprecation", - "pylance==0.9.2", + "pylance==0.9.5", "ratelimiter~=1.0", "retry>=0.9.2", "tqdm>=4.27.0", From 175ad9223b09adbc4a8ce40ba459eeceb54814fe Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Mon, 8 Jan 2024 21:12:48 -0800 Subject: [PATCH 29/43] feat(node): support table.schema for LocalTable (#789) Close #773 we pass an empty table over IPC so we don't need to manually deal with serde. Then we just return the schema attribute from the empty table. --------- Co-authored-by: albertlockett --- node/src/index.ts | 30 ++++++++++++++++++++++++++++-- node/src/test/test.ts | 21 +++++++++++++++++++++ rust/ffi/node/src/arrow.rs | 2 +- rust/ffi/node/src/convert.rs | 20 ++++++++++++++++++++ rust/ffi/node/src/lib.rs | 1 + rust/ffi/node/src/query.rs | 20 +------------------- rust/ffi/node/src/table.rs | 29 ++++++++++++++++++++++++++--- 7 files changed, 98 insertions(+), 25 deletions(-) diff --git a/node/src/index.ts b/node/src/index.ts index 71ab0a94..a5de03eb 100644 --- a/node/src/index.ts +++ b/node/src/index.ts @@ -14,7 +14,8 @@ import { type Schema, - Table as ArrowTable + Table as ArrowTable, + tableFromIPC } from 'apache-arrow' import { createEmptyTable, fromRecordsToBuffer, fromTableToBuffer } from './arrow' import type { EmbeddingFunction } from './embedding/embedding_function' @@ -24,7 +25,7 @@ import { isEmbeddingFunction } from './embedding/embedding_function' import { type Literal, toSQL } from './util' // eslint-disable-next-line @typescript-eslint/no-var-requires -const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateScalarIndex, tableCreateVectorIndex, tableCountRows, tableDelete, tableUpdate, tableCleanupOldVersions, tableCompactFiles, tableListIndices, tableIndexStats } = require('../native.js') +const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateScalarIndex, tableCreateVectorIndex, tableCountRows, tableDelete, tableUpdate, tableCleanupOldVersions, tableCompactFiles, tableListIndices, tableIndexStats, tableSchema } = require('../native.js') export { Query } export type { EmbeddingFunction } @@ -354,6 +355,8 @@ export interface Table { * Get statistics about an index. */ indexStats: (indexUuid: string) => Promise + + schema: Promise } export interface UpdateArgs { @@ -508,6 +511,7 @@ export class LocalConnection implements Connection { export class LocalTable implements Table { private _tbl: any private readonly _name: string + private readonly _isElectron: boolean private readonly _embeddings?: EmbeddingFunction private readonly _options: () => ConnectionOptions @@ -524,6 +528,7 @@ export class LocalTable implements Table { this._name = name this._embeddings = embeddings this._options = () => options + this._isElectron = this.checkElectron() } get name (): string { @@ -682,6 +687,27 @@ export class LocalTable implements Table { async indexStats (indexUuid: string): Promise { return tableIndexStats.call(this._tbl, indexUuid) } + + get schema (): Promise { + // empty table + return this.getSchema() + } + + private async getSchema (): Promise { + const buffer = await tableSchema.call(this._tbl, this._isElectron) + const table = tableFromIPC(buffer) + return table.schema + } + + // See https://github.com/electron/electron/issues/2288 + private checkElectron (): boolean { + try { + // eslint-disable-next-line no-prototype-builtins + return (process?.versions?.hasOwnProperty('electron') || navigator?.userAgent?.toLowerCase()?.includes(' electron')) + } catch (e) { + return false + } + } } export interface CleanupStats { diff --git a/node/src/test/test.ts b/node/src/test/test.ts index e86d9613..68a0f331 100644 --- a/node/src/test/test.ts +++ b/node/src/test/test.ts @@ -498,6 +498,27 @@ describe('LanceDB client', function () { assert.equal(results.length, 2) }) }) + + describe('when inspecting the schema', function () { + it('should return the schema', async function () { + const uri = await createTestDB() + const db = await lancedb.connect(uri) + // the fsl inner field must be named 'item' and be nullable + const expectedSchema = new Schema( + [ + new Field('id', new Int32()), + new Field('vector', new FixedSizeList(128, new Field('item', new Float32(), true))), + new Field('s', new Utf8()) + ] + ) + const table = await db.createTable({ + name: 'some_table', + schema: expectedSchema + }) + const schema = await table.schema + assert.deepEqual(expectedSchema, schema) + }) + }) }) describe('Remote LanceDB client', function () { diff --git a/rust/ffi/node/src/arrow.rs b/rust/ffi/node/src/arrow.rs index 58ca62da..88f6f2b2 100644 --- a/rust/ffi/node/src/arrow.rs +++ b/rust/ffi/node/src/arrow.rs @@ -36,7 +36,7 @@ fn validate_vector_column(record_batch: &RecordBatch) -> Result<()> { pub(crate) fn arrow_buffer_to_record_batch(slice: &[u8]) -> Result<(Vec, SchemaRef)> { let mut batches: Vec = Vec::new(); let file_reader = FileReader::try_new(Cursor::new(slice), None)?; - let schema = file_reader.schema().clone(); + let schema = file_reader.schema(); for b in file_reader { let record_batch = b?; validate_vector_column(&record_batch)?; diff --git a/rust/ffi/node/src/convert.rs b/rust/ffi/node/src/convert.rs index 1f9ef4bd..2736d2ca 100644 --- a/rust/ffi/node/src/convert.rs +++ b/rust/ffi/node/src/convert.rs @@ -13,6 +13,9 @@ // limitations under the License. use neon::prelude::*; +use neon::types::buffer::TypedArray; + +use crate::error::ResultExt; pub(crate) fn vec_str_to_array<'a, C: Context<'a>>( vec: &Vec, @@ -34,3 +37,20 @@ pub(crate) fn js_array_to_vec(array: &JsArray, cx: &mut FunctionContext) -> Vec< } query_vec } + +// Creates a new JsBuffer from a rust buffer with a special logic for electron +pub(crate) fn new_js_buffer<'a>( + buffer: Vec, + cx: &mut TaskContext<'a>, + is_electron: bool, +) -> NeonResult> { + if is_electron { + // Electron does not support `external`: https://github.com/neon-bindings/neon/pull/937 + let mut js_buffer = JsBuffer::new(cx, buffer.len()).or_throw(cx)?; + let buffer_data = js_buffer.as_mut_slice(cx); + buffer_data.copy_from_slice(buffer.as_slice()); + Ok(js_buffer) + } else { + Ok(JsBuffer::external(cx, buffer)) + } +} diff --git a/rust/ffi/node/src/lib.rs b/rust/ffi/node/src/lib.rs index 46539916..b1bc9ca2 100644 --- a/rust/ffi/node/src/lib.rs +++ b/rust/ffi/node/src/lib.rs @@ -250,5 +250,6 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> { "tableCreateVectorIndex", index::vector::table_create_vector_index, )?; + cx.export_function("tableSchema", JsTable::js_schema)?; Ok(()) } diff --git a/rust/ffi/node/src/query.rs b/rust/ffi/node/src/query.rs index f24f437f..6250c6f8 100644 --- a/rust/ffi/node/src/query.rs +++ b/rust/ffi/node/src/query.rs @@ -7,7 +7,6 @@ use lance_linalg::distance::MetricType; use neon::context::FunctionContext; use neon::handle::Handle; use neon::prelude::*; -use neon::types::buffer::TypedArray; use crate::arrow::record_batch_to_buffer; use crate::error::ResultExt; @@ -96,26 +95,9 @@ impl JsQuery { deferred.settle_with(&channel, move |mut cx| { let results = results.or_throw(&mut cx)?; let buffer = record_batch_to_buffer(results).or_throw(&mut cx)?; - Self::new_js_buffer(buffer, &mut cx, is_electron) + convert::new_js_buffer(buffer, &mut cx, is_electron) }); }); Ok(promise) } - - // Creates a new JsBuffer from a rust buffer with a special logic for electron - fn new_js_buffer<'a>( - buffer: Vec, - cx: &mut TaskContext<'a>, - is_electron: bool, - ) -> NeonResult> { - if is_electron { - // Electron does not support `external`: https://github.com/neon-bindings/neon/pull/937 - let mut js_buffer = JsBuffer::new(cx, buffer.len()).or_throw(cx)?; - let buffer_data = js_buffer.as_mut_slice(cx); - buffer_data.copy_from_slice(buffer.as_slice()); - Ok(js_buffer) - } else { - Ok(JsBuffer::external(cx, buffer)) - } - } } diff --git a/rust/ffi/node/src/table.rs b/rust/ffi/node/src/table.rs index c4777a4b..fd4b6ef3 100644 --- a/rust/ffi/node/src/table.rs +++ b/rust/ffi/node/src/table.rs @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -use arrow_array::RecordBatchIterator; +use arrow_array::{RecordBatch, RecordBatchIterator}; use lance::dataset::optimize::CompactionOptions; use lance::dataset::{WriteMode, WriteParams}; use lance::io::object_store::ObjectStoreParams; -use crate::arrow::arrow_buffer_to_record_batch; +use crate::arrow::{arrow_buffer_to_record_batch, record_batch_to_buffer}; use neon::prelude::*; use neon::types::buffer::TypedArray; use vectordb::Table; use crate::error::ResultExt; -use crate::{get_aws_creds, get_aws_region, runtime, JsDatabase}; +use crate::{convert, get_aws_creds, get_aws_region, runtime, JsDatabase}; pub(crate) struct JsTable { pub table: Table, @@ -426,4 +426,27 @@ impl JsTable { Ok(promise) } + + pub(crate) fn js_schema(mut cx: FunctionContext) -> JsResult { + let js_table = cx.this().downcast_or_throw::, _>(&mut cx)?; + let rt = runtime(&mut cx)?; + let (deferred, promise) = cx.promise(); + let channel = cx.channel(); + let table = js_table.table.clone(); + + let is_electron = cx + .argument::(0) + .or_throw(&mut cx)? + .value(&mut cx); + + rt.spawn(async move { + deferred.settle_with(&channel, move |mut cx| { + let schema = table.schema(); + let batches = vec![RecordBatch::new_empty(schema)]; + let buffer = record_batch_to_buffer(batches).or_throw(&mut cx)?; + convert::new_js_buffer(buffer, &mut cx, is_electron) + }) + }); + Ok(promise) + } } From 1dd663fc8a3b26c85fb3038694a9975001001d5d Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Mon, 8 Jan 2024 21:49:31 -0800 Subject: [PATCH 30/43] chore(python): document phrase queries in fts (#788) closes #769 Add unit test and documentation on using quotes to perform a phrase query --- docs/src/fts.md | 16 ++++++++++++++++ python/tests/test_fts.py | 17 +++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/docs/src/fts.md b/docs/src/fts.md index 721a5cf1..183371fa 100644 --- a/docs/src/fts.md +++ b/docs/src/fts.md @@ -75,6 +75,22 @@ applied on top of the full text search results. This can be invoked via the fami table.search("puppy").limit(10).where("meta='foo'").to_list() ``` +## Syntax + +For full-text search you can perform either a phrase query like "the old man and the sea", +or a structured search query like "(Old AND Man) AND Sea". +Double quotes are used to disambiguate. + +For example: + +If you intended "they could have been dogs OR cats" as a phrase query, this actually +raises a syntax error since `OR` is a recognized operator. If you make `or` lower case, +this avoids the syntax error. However, it is cumbersome to have to remember what will +conflict with the query syntax. Instead, if you search using +`table.search('"they could have been dogs OR cats"')`, then the syntax checker avoids +checking inside the quotes. + + ## Configurations By default, LanceDB configures a 1GB heap size limit for creating the index. You can diff --git a/python/tests/test_fts.py b/python/tests/test_fts.py index baa07096..b7c81f61 100644 --- a/python/tests/test_fts.py +++ b/python/tests/test_fts.py @@ -162,3 +162,20 @@ def test_null_input(table): ] ) table.create_fts_index("text") + + +def test_syntax(table): + # https://github.com/lancedb/lancedb/issues/769 + table.create_fts_index("text") + with pytest.raises(ValueError, match="Syntax Error"): + table.search("they could have been dogs OR cats").limit(10).to_list() + # this should work + table.search('"they could have been dogs OR cats"').limit(10).to_list() + # this should work too + table.search('''"the cats OR dogs were not really 'pets' at all"''').limit( + 10 + ).to_list() + with pytest.raises(ValueError, match="Syntax Error"): + table.search('''"the cats OR dogs were not really "pets" at all"''').limit( + 10 + ).to_list() From a07c6c465a793a5c7804454bde4ac98bc59b97d8 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Tue, 9 Jan 2024 07:03:29 -0800 Subject: [PATCH 31/43] feat(python): support new style optional syntax (#793) --- python/lancedb/pydantic.py | 13 +++++++++++++ python/tests/test_fts.py | 2 +- python/tests/test_pydantic.py | 22 ++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/python/lancedb/pydantic.py b/python/lancedb/pydantic.py index 48a67189..859eeaa8 100644 --- a/python/lancedb/pydantic.py +++ b/python/lancedb/pydantic.py @@ -192,6 +192,7 @@ else: def _pydantic_to_arrow_type(field: pydantic.fields.FieldInfo) -> pa.DataType: """Convert a Pydantic FieldInfo to Arrow DataType""" + if isinstance(field.annotation, _GenericAlias) or ( sys.version_info > (3, 9) and isinstance(field.annotation, types.GenericAlias) ): @@ -203,6 +204,13 @@ def _pydantic_to_arrow_type(field: pydantic.fields.FieldInfo) -> pa.DataType: elif origin == Union: if len(args) == 2 and args[1] == type(None): return _py_type_to_arrow_type(args[0], field) + elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType): + args = field.annotation.__args__ + if len(args) == 2: + for typ in args: + if typ == type(None): + continue + return _py_type_to_arrow_type(typ, field) elif inspect.isclass(field.annotation): if issubclass(field.annotation, pydantic.BaseModel): # Struct @@ -221,6 +229,11 @@ def is_nullable(field: pydantic.fields.FieldInfo) -> bool: if origin == Union: if len(args) == 2 and args[1] == type(None): return True + elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType): + args = field.annotation.__args__ + for typ in args: + if typ == type(None): + return True return False diff --git a/python/tests/test_fts.py b/python/tests/test_fts.py index b7c81f61..f65dc4ca 100644 --- a/python/tests/test_fts.py +++ b/python/tests/test_fts.py @@ -82,7 +82,7 @@ def test_search_index(tmp_path, table): def test_create_index_from_table(tmp_path, table): table.create_fts_index("text") df = table.search("puppy").limit(10).select(["text"]).to_pandas() - assert len(df) == 10 + assert len(df) <= 10 assert "text" in df.columns # Check whether it can be updated diff --git a/python/tests/test_pydantic.py b/python/tests/test_pydantic.py index 8a3ee16b..c6376dce 100644 --- a/python/tests/test_pydantic.py +++ b/python/tests/test_pydantic.py @@ -88,6 +88,28 @@ def test_pydantic_to_arrow(): assert schema == expect_schema +@pytest.mark.skipif( + sys.version_info < (3, 10), + reason="using | type syntax requires python3.10 or higher", +) +def test_optional_types_py310(): + class TestModel(pydantic.BaseModel): + a: str | None + b: None | str + c: Optional[str] + + schema = pydantic_to_schema(TestModel) + + expect_schema = pa.schema( + [ + pa.field("a", pa.utf8(), True), + pa.field("b", pa.utf8(), True), + pa.field("c", pa.utf8(), True), + ] + ) + assert schema == expect_schema + + @pytest.mark.skipif( sys.version_info > (3, 8), reason="using native type alias requires python3.9 or higher", From f3a905af63ddab1889a44d8eb40cc397769d476b Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Tue, 9 Jan 2024 19:27:38 -0800 Subject: [PATCH 32/43] fix(rust): not sure why clippy is suddenly unhappy (#794) should fix the error on top of main https://github.com/lancedb/lancedb/actions/runs/7457190471/job/20288985725 --- rust/ffi/node/src/arrow.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/ffi/node/src/arrow.rs b/rust/ffi/node/src/arrow.rs index 88f6f2b2..0e1054ce 100644 --- a/rust/ffi/node/src/arrow.rs +++ b/rust/ffi/node/src/arrow.rs @@ -50,7 +50,7 @@ pub(crate) fn record_batch_to_buffer(batches: Vec) -> Result Date: Tue, 9 Jan 2024 19:33:03 -0800 Subject: [PATCH 33/43] feat(python): add count_rows with filter option (#801) Closes #795 --- python/lancedb/table.py | 13 ++++++++++++- python/tests/test_table.py | 11 +++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/python/lancedb/table.py b/python/lancedb/table.py index 7db8a3d1..ae959c8d 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -646,8 +646,19 @@ class LanceTable(Table): self._dataset.restore() self._reset_dataset() + def count_rows(self, filter: Optional[str] = None) -> int: + """ + Count the number of rows in the table. + + Parameters + ---------- + filter: str, optional + A SQL where clause to filter the rows to count. + """ + return self._dataset.count_rows(filter) + def __len__(self): - return self._dataset.count_rows() + return self.count_rows() def __repr__(self) -> str: return f"LanceTable({self.name})" diff --git a/python/tests/test_table.py b/python/tests/test_table.py index c6d948cf..4c769009 100644 --- a/python/tests/test_table.py +++ b/python/tests/test_table.py @@ -597,3 +597,14 @@ def test_compact_cleanup(db): with pytest.raises(Exception, match="Version 3 no longer exists"): table.checkout(3) + + +def test_count_rows(db): + table = LanceTable.create( + db, + "my_table", + data=[{"text": "foo", "id": 0}, {"text": "bar", "id": 1}], + ) + assert len(table) == 2 + assert table.count_rows() == 2 + assert table.count_rows(filter="text='bar'") == 1 From 881dfa022b40a1aa3e930c5183b685e23d377c93 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Tue, 9 Jan 2024 19:41:31 -0800 Subject: [PATCH 34/43] feat(python): add phrase query option for fts (#798) addresses #797 Problem: tantivy does not expose option to explicitly Proposed solution here: 1. Add a `.phrase_query()` option 2. Under the hood, LanceDB takes care of wrapping the input in quotes and replace nested double quotes with single quotes I've also filed an upstream issue, if they support phrase queries natively then we can get rid of our manual custom processing here. --- python/lancedb/query.py | 24 +++++++++++++++++++++++- python/tests/test_fts.py | 11 +++++++---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/python/lancedb/query.py b/python/lancedb/query.py index dcc72c58..532566da 100644 --- a/python/lancedb/query.py +++ b/python/lancedb/query.py @@ -468,6 +468,24 @@ class LanceFtsQueryBuilder(LanceQueryBuilder): def __init__(self, table: "lancedb.table.Table", query: str): super().__init__(table) self._query = query + self._phrase_query = False + + def phrase_query(self, phrase_query: bool = True) -> LanceFtsQueryBuilder: + """Set whether to use phrase query. + + Parameters + ---------- + phrase_query: bool, default True + If True, then the query will be wrapped in quotes and + double quotes replaced by single quotes. + + Returns + ------- + LanceFtsQueryBuilder + The LanceFtsQueryBuilder object. + """ + self._phrase_query = phrase_query + return self def to_arrow(self) -> pa.Table: try: @@ -490,7 +508,11 @@ class LanceFtsQueryBuilder(LanceQueryBuilder): # open the index index = tantivy.Index.open(index_path) # get the scores and doc ids - row_ids, scores = search_index(index, self._query, self._limit) + query = self._query + if self._phrase_query: + query = query.replace('"', "'") + query = f'"{query}"' + row_ids, scores = search_index(index, query, self._limit) if len(row_ids) == 0: empty_schema = pa.schema([pa.field("score", pa.float32())]) return pa.Table.from_pylist([], schema=empty_schema) diff --git a/python/tests/test_fts.py b/python/tests/test_fts.py index f65dc4ca..a62b1b2e 100644 --- a/python/tests/test_fts.py +++ b/python/tests/test_fts.py @@ -169,13 +169,16 @@ def test_syntax(table): table.create_fts_index("text") with pytest.raises(ValueError, match="Syntax Error"): table.search("they could have been dogs OR cats").limit(10).to_list() + table.search("they could have been dogs OR cats").phrase_query().limit(10).to_list() # this should work table.search('"they could have been dogs OR cats"').limit(10).to_list() # this should work too table.search('''"the cats OR dogs were not really 'pets' at all"''').limit( 10 ).to_list() - with pytest.raises(ValueError, match="Syntax Error"): - table.search('''"the cats OR dogs were not really "pets" at all"''').limit( - 10 - ).to_list() + table.search('the cats OR dogs were not really "pets" at all').phrase_query().limit( + 10 + ).to_list() + table.search('the cats OR dogs were not really "pets" at all').phrase_query().limit( + 10 + ).to_list() From 7581cbb38fb0777530874d542bc9682824d55034 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Tue, 9 Jan 2024 20:20:13 -0800 Subject: [PATCH 35/43] chore(python): add docstring for limit behavior (#800) Closes #796 --- python/lancedb/query.py | 14 ++++++++++++-- python/tests/test_table.py | 8 ++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/python/lancedb/query.py b/python/lancedb/query.py index 532566da..886ec966 100644 --- a/python/lancedb/query.py +++ b/python/lancedb/query.py @@ -260,20 +260,30 @@ class LanceQueryBuilder(ABC): for row in self.to_arrow().to_pylist() ] - def limit(self, limit: int) -> LanceQueryBuilder: + def limit(self, limit: Union[int, None]) -> LanceQueryBuilder: """Set the maximum number of results to return. Parameters ---------- limit: int The maximum number of results to return. + By default the query is limited to the first 10. + Call this method and pass 0, a negative value, + or None to remove the limit. + *WARNING* if you have a large dataset, removing + the limit can potentially result in reading a + large amount of data into memory and cause + out of memory issues. Returns ------- LanceQueryBuilder The LanceQueryBuilder object. """ - self._limit = limit + if limit is None or limit <= 0: + self._limit = None + else: + self._limit = limit return self def select(self, columns: list) -> LanceQueryBuilder: diff --git a/python/tests/test_table.py b/python/tests/test_table.py index 4c769009..3f096a53 100644 --- a/python/tests/test_table.py +++ b/python/tests/test_table.py @@ -569,6 +569,14 @@ def test_empty_query(db): val = df.id.iloc[0] assert val == 1 + table = LanceTable.create(db, "my_table2", data=[{"id": i} for i in range(100)]) + df = table.search().select(["id"]).to_pandas() + assert len(df) == 10 + df = table.search().select(["id"]).limit(None).to_pandas() + assert len(df) == 100 + df = table.search().select(["id"]).limit(-1).to_pandas() + assert len(df) == 100 + def test_compact_cleanup(db): table = LanceTable.create( From 4aa7f58a0798adf87a1ee4c16cd50c54e9c4ddfe Mon Sep 17 00:00:00 2001 From: Sebastian Law Date: Tue, 9 Jan 2024 21:07:50 -0800 Subject: [PATCH 36/43] use requests instead of aiohttp for underlying http client (#803) instead of starting and stopping the current thread's event loop on every http call, just make an http call. --- python/lancedb/remote/client.py | 77 ++++++++++++++++----------------- python/lancedb/remote/db.py | 36 ++++++--------- python/lancedb/remote/table.py | 43 ++++++++---------- python/pyproject.toml | 3 +- python/tests/test_remote_db.py | 6 +-- 5 files changed, 72 insertions(+), 93 deletions(-) diff --git a/python/lancedb/remote/client.py b/python/lancedb/remote/client.py index 30b68e93..6f9bf292 100644 --- a/python/lancedb/remote/client.py +++ b/python/lancedb/remote/client.py @@ -13,9 +13,10 @@ import functools -from typing import Any, Callable, Dict, Iterable, Optional, Union +from typing import Any, Callable, Dict, Iterable, List, Optional, Union +from urllib.parse import urljoin -import aiohttp +import requests import attrs import pyarrow as pa from pydantic import BaseModel @@ -37,8 +38,8 @@ def _check_not_closed(f): return wrapped -async def _read_ipc(resp: aiohttp.ClientResponse) -> pa.Table: - resp_body = await resp.read() +def _read_ipc(resp: requests.Response) -> pa.Table: + resp_body = resp.content with pa.ipc.open_file(pa.BufferReader(resp_body)) as reader: return reader.read_all() @@ -53,15 +54,18 @@ class RestfulLanceDBClient: closed: bool = attrs.field(default=False, init=False) @functools.cached_property - def session(self) -> aiohttp.ClientSession: - url = ( + def session(self) -> requests.Session: + return requests.Session() + + @property + def url(self) -> str: + return ( self.host_override or f"https://{self.db_name}.{self.region}.api.lancedb.com" ) - return aiohttp.ClientSession(url) - async def close(self): - await self.session.close() + def close(self): + self.session.close() self.closed = True @functools.cached_property @@ -76,38 +80,38 @@ class RestfulLanceDBClient: return headers @staticmethod - async def _check_status(resp: aiohttp.ClientResponse): - if resp.status == 404: - raise LanceDBClientError(f"Not found: {await resp.text()}") - elif 400 <= resp.status < 500: + def _check_status(resp: requests.Response): + if resp.status_code == 404: + raise LanceDBClientError(f"Not found: {resp.text}") + elif 400 <= resp.status_code < 500: raise LanceDBClientError( - f"Bad Request: {resp.status}, error: {await resp.text()}" + f"Bad Request: {resp.status_code}, error: {resp.text}" ) - elif 500 <= resp.status < 600: + elif 500 <= resp.status_code < 600: raise LanceDBClientError( - f"Internal Server Error: {resp.status}, error: {await resp.text()}" + f"Internal Server Error: {resp.status_code}, error: {resp.text}" ) - elif resp.status != 200: + elif resp.status_code != 200: raise LanceDBClientError( - f"Unknown Error: {resp.status}, error: {await resp.text()}" + f"Unknown Error: {resp.status_code}, error: {resp.text}" ) @_check_not_closed - async def get(self, uri: str, params: Union[Dict[str, Any], BaseModel] = None): + def get(self, uri: str, params: Union[Dict[str, Any], BaseModel] = None): """Send a GET request and returns the deserialized response payload.""" if isinstance(params, BaseModel): params: Dict[str, Any] = params.dict(exclude_none=True) - async with self.session.get( - uri, + with self.session.get( + urljoin(self.url, uri), params=params, headers=self.headers, - timeout=aiohttp.ClientTimeout(total=30), + timeout=(5.0, 30.0), ) as resp: - await self._check_status(resp) - return await resp.json() + self._check_status(resp) + return resp.json() @_check_not_closed - async def post( + def post( self, uri: str, data: Optional[Union[Dict[str, Any], BaseModel, bytes]] = None, @@ -139,31 +143,26 @@ class RestfulLanceDBClient: headers["content-type"] = content_type if request_id is not None: headers["x-request-id"] = request_id - async with self.session.post( - uri, + with self.session.post( + urljoin(self.url, uri), headers=headers, params=params, - timeout=aiohttp.ClientTimeout(total=30), + timeout=(5.0, 30.0), **req_kwargs, ) as resp: - resp: aiohttp.ClientResponse = resp - await self._check_status(resp) - return await deserialize(resp) + self._check_status(resp) + return deserialize(resp) @_check_not_closed - async def list_tables( - self, limit: int, page_token: Optional[str] = None - ) -> Iterable[str]: + def list_tables(self, limit: int, page_token: Optional[str] = None) -> List[str]: """List all tables in the database.""" if page_token is None: page_token = "" - json = await self.get("/v1/table/", {"limit": limit, "page_token": page_token}) + json = self.get("/v1/table/", {"limit": limit, "page_token": page_token}) return json["tables"] @_check_not_closed - async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult: + def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult: """Query a table.""" - tbl = await self.post( - f"/v1/table/{table_name}/query/", query, deserialize=_read_ipc - ) + tbl = self.post(f"/v1/table/{table_name}/query/", query, deserialize=_read_ipc) return VectorQueryResult(tbl) diff --git a/python/lancedb/remote/db.py b/python/lancedb/remote/db.py index 0c8be4ca..337406db 100644 --- a/python/lancedb/remote/db.py +++ b/python/lancedb/remote/db.py @@ -50,10 +50,6 @@ class RemoteDBConnection(DBConnection): self._client = RestfulLanceDBClient( self.db_name, region, api_key, host_override ) - try: - self._loop = asyncio.get_running_loop() - except RuntimeError: - self._loop = asyncio.get_event_loop() def __repr__(self) -> str: return f"RemoteConnect(name={self.db_name})" @@ -76,9 +72,8 @@ class RemoteDBConnection(DBConnection): An iterator of table names. """ while True: - result = self._loop.run_until_complete( - self._client.list_tables(limit, page_token) - ) + result = self._client.list_tables(limit, page_token) + if len(result) > 0: page_token = result[len(result) - 1] else: @@ -103,9 +98,7 @@ class RemoteDBConnection(DBConnection): # check if table exists try: - self._loop.run_until_complete( - self._client.post(f"/v1/table/{name}/describe/") - ) + self._client.post(f"/v1/table/{name}/describe/") except LanceDBClientError as err: if str(err).startswith("Not found"): logging.error( @@ -248,14 +241,13 @@ class RemoteDBConnection(DBConnection): data = to_ipc_binary(data) request_id = uuid.uuid4().hex - self._loop.run_until_complete( - self._client.post( - f"/v1/table/{name}/create/", - data=data, - request_id=request_id, - content_type=ARROW_STREAM_CONTENT_TYPE, - ) + self._client.post( + f"/v1/table/{name}/create/", + data=data, + request_id=request_id, + content_type=ARROW_STREAM_CONTENT_TYPE, ) + return RemoteTable(self, name) @override @@ -267,13 +259,11 @@ class RemoteDBConnection(DBConnection): name: str The name of the table. """ - self._loop.run_until_complete( - self._client.post( - f"/v1/table/{name}/drop/", - ) + + self._client.post( + f"/v1/table/{name}/drop/", ) async def close(self): """Close the connection to the database.""" - self._loop.close() - await self._client.close() + self._client.close() diff --git a/python/lancedb/remote/table.py b/python/lancedb/remote/table.py index e09011a7..63572ebb 100644 --- a/python/lancedb/remote/table.py +++ b/python/lancedb/remote/table.py @@ -43,18 +43,14 @@ class RemoteTable(Table): of this Table """ - resp = self._conn._loop.run_until_complete( - self._conn._client.post(f"/v1/table/{self._name}/describe/") - ) + resp = self._conn._client.post(f"/v1/table/{self._name}/describe/") schema = json_to_schema(resp["schema"]) return schema @property def version(self) -> int: """Get the current version of the table""" - resp = self._conn._loop.run_until_complete( - self._conn._client.post(f"/v1/table/{self._name}/describe/") - ) + resp = self._conn._client.post(f"/v1/table/{self._name}/describe/") return resp["version"] def to_arrow(self) -> pa.Table: @@ -116,9 +112,10 @@ class RemoteTable(Table): "metric_type": metric, "index_cache_size": index_cache_size, } - resp = self._conn._loop.run_until_complete( - self._conn._client.post(f"/v1/table/{self._name}/create_index/", data=data) + resp = self._conn._client.post( + f"/v1/table/{self._name}/create_index/", data=data ) + return resp def add( @@ -161,13 +158,11 @@ class RemoteTable(Table): request_id = uuid.uuid4().hex - self._conn._loop.run_until_complete( - self._conn._client.post( - f"/v1/table/{self._name}/insert/", - data=payload, - params={"request_id": request_id, "mode": mode}, - content_type=ARROW_STREAM_CONTENT_TYPE, - ) + self._conn._client.post( + f"/v1/table/{self._name}/insert/", + data=payload, + params={"request_id": request_id, "mode": mode}, + content_type=ARROW_STREAM_CONTENT_TYPE, ) def search( @@ -233,19 +228,19 @@ class RemoteTable(Table): and len(query.vector) > 0 and not isinstance(query.vector[0], float) ): - futures = [] + results = [] for v in query.vector: v = list(v) q = query.copy() q.vector = v - futures.append(self._conn._client.query(self._name, q)) - result = self._conn._loop.run_until_complete(asyncio.gather(*futures)) + results.append(self._conn._client.query(self._name, q)) + return pa.concat_tables( - [add_index(r.to_arrow(), i) for i, r in enumerate(result)] + [add_index(r.to_arrow(), i) for i, r in enumerate(results)] ) else: result = self._conn._client.query(self._name, query) - return self._conn._loop.run_until_complete(result).to_arrow() + return result.to_arrow() def delete(self, predicate: str): """Delete rows from the table. @@ -294,9 +289,7 @@ class RemoteTable(Table): 0 2 [3.0, 4.0] 85.0 # doctest: +SKIP """ payload = {"predicate": predicate} - self._conn._loop.run_until_complete( - self._conn._client.post(f"/v1/table/{self._name}/delete/", data=payload) - ) + self._conn._client.post(f"/v1/table/{self._name}/delete/", data=payload) def update( self, @@ -356,9 +349,7 @@ class RemoteTable(Table): updates = [[k, v] for k, v in values_sql.items()] payload = {"predicate": where, "updates": updates} - self._conn._loop.run_until_complete( - self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload) - ) + self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload) def add_index(tbl: pa.Table, i: int) -> pa.Table: diff --git a/python/pyproject.toml b/python/pyproject.toml index 71e532d3..f1f9cc26 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -7,7 +7,6 @@ dependencies = [ "ratelimiter~=1.0", "retry>=0.9.2", "tqdm>=4.27.0", - "aiohttp", "pydantic>=1.10", "attrs>=21.3.0", "semver>=3.0", @@ -46,7 +45,7 @@ classifiers = [ repository = "https://github.com/lancedb/lancedb" [project.optional-dependencies] -tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb", "pytz"] +tests = ["aiohttp", "pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "duckdb", "pytz"] dev = ["ruff", "pre-commit", "black"] docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] clip = ["torch", "pillow", "open-clip"] diff --git a/python/tests/test_remote_db.py b/python/tests/test_remote_db.py index 00ee8c43..d4928c6a 100644 --- a/python/tests/test_remote_db.py +++ b/python/tests/test_remote_db.py @@ -18,15 +18,15 @@ from lancedb.remote.client import VectorQuery, VectorQueryResult class FakeLanceDBClient: - async def close(self): + def close(self): pass - async def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult: + def query(self, table_name: str, query: VectorQuery) -> VectorQueryResult: assert table_name == "test" t = pa.schema([]).empty_table() return VectorQueryResult(t) - async def post(self, path: str): + def post(self, path: str): pass From 4b243c5ff848c12443edb57c5eaa8bcdef47561c Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Wed, 10 Jan 2024 16:44:00 -0800 Subject: [PATCH 37/43] feat(node): align incoming data to table schema (#802) --- node/package-lock.json | 520 +++++++++++++++++------------------------ node/package.json | 4 +- node/src/arrow.ts | 53 ++++- node/src/index.ts | 7 +- node/src/test/test.ts | 39 ++++ 5 files changed, 307 insertions(+), 316 deletions(-) diff --git a/node/package-lock.json b/node/package-lock.json index 541662fb..1373fb99 100644 --- a/node/package-lock.json +++ b/node/package-lock.json @@ -18,9 +18,9 @@ "win32" ], "dependencies": { - "@apache-arrow/ts": "^12.0.0", + "@apache-arrow/ts": "^14.0.2", "@neon-rs/load": "^0.0.74", - "apache-arrow": "^12.0.0", + "apache-arrow": "^14.0.2", "axios": "^1.4.0" }, "devDependencies": { @@ -60,32 +60,52 @@ "@lancedb/vectordb-win32-x64-msvc": "0.4.2" } }, + "node_modules/@75lb/deep-merge": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@75lb/deep-merge/-/deep-merge-1.1.1.tgz", + "integrity": "sha512-xvgv6pkMGBA6GwdyJbNAnDmfAIR/DfWhrj9jgWh3TY7gRm3KO46x/GPjRg6wJ0nOepwqrNxFfojebh0Df4h4Tw==", + "dependencies": { + "lodash.assignwith": "^4.2.0", + "typical": "^7.1.1" + }, + "engines": { + "node": ">=12.17" + } + }, + "node_modules/@75lb/deep-merge/node_modules/typical": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", + "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==", + "engines": { + "node": ">=12.17" + } + }, "node_modules/@apache-arrow/ts": { - "version": "12.0.0", - "resolved": "https://registry.npmjs.org/@apache-arrow/ts/-/ts-12.0.0.tgz", - "integrity": "sha512-ArJ3Fw5W9RAeNWuyCU2CdjL/nEAZSVDG1p3jz/ZtLo/q3NTz2w7HUCOJeszejH/5alGX+QirYrJ5c6BW++/P7g==", + "version": "14.0.2", + "resolved": "https://registry.npmjs.org/@apache-arrow/ts/-/ts-14.0.2.tgz", + "integrity": "sha512-CtwAvLkK0CZv7xsYeCo91ml6PvlfzAmAJZkRYuz2GNBwfYufj5SVi0iuSMwIMkcU/szVwvLdzORSLa5PlF/2ug==", "dependencies": { "@types/command-line-args": "5.2.0", "@types/command-line-usage": "5.0.2", - "@types/node": "18.14.5", + "@types/node": "20.3.0", "@types/pad-left": "2.1.1", "command-line-args": "5.2.1", - "command-line-usage": "6.1.3", - "flatbuffers": "23.3.3", + "command-line-usage": "7.0.1", + "flatbuffers": "23.5.26", "json-bignum": "^0.0.3", "pad-left": "^2.1.0", - "tslib": "^2.5.0" + "tslib": "^2.5.3" } }, "node_modules/@apache-arrow/ts/node_modules/@types/node": { - "version": "18.14.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.14.5.tgz", - "integrity": "sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw==" + "version": "20.3.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz", + "integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ==" }, "node_modules/@apache-arrow/ts/node_modules/tslib": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", + "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" }, "node_modules/@cargo-messages/android-arm-eabi": { "version": "0.0.160", @@ -866,7 +886,6 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, "dependencies": { "color-convert": "^2.0.1" }, @@ -891,34 +910,34 @@ } }, "node_modules/apache-arrow": { - "version": "12.0.0", - "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-12.0.0.tgz", - "integrity": "sha512-uI+hnZZsGfNJiR/wG8j5yPQuDjmOHx4hZpkA743G4x3TlFrCpA3MMX7KUkIOIw0e/CwZ8NYuaMzaQsblA47qVA==", + "version": "14.0.2", + "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-14.0.2.tgz", + "integrity": "sha512-EBO2xJN36/XoY81nhLcwCJgFwkboDZeyNQ+OPsG7bCoQjc2BT0aTyH/MR6SrL+LirSNz+cYqjGRlupMMlP1aEg==", "dependencies": { "@types/command-line-args": "5.2.0", "@types/command-line-usage": "5.0.2", - "@types/node": "18.14.5", + "@types/node": "20.3.0", "@types/pad-left": "2.1.1", "command-line-args": "5.2.1", - "command-line-usage": "6.1.3", - "flatbuffers": "23.3.3", + "command-line-usage": "7.0.1", + "flatbuffers": "23.5.26", "json-bignum": "^0.0.3", "pad-left": "^2.1.0", - "tslib": "^2.5.0" + "tslib": "^2.5.3" }, "bin": { "arrow2csv": "bin/arrow2csv.js" } }, "node_modules/apache-arrow/node_modules/@types/node": { - "version": "18.14.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.14.5.tgz", - "integrity": "sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw==" + "version": "20.3.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz", + "integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ==" }, "node_modules/apache-arrow/node_modules/tslib": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", + "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" }, "node_modules/arg": { "version": "4.1.3", @@ -1170,7 +1189,6 @@ "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", - "dev": true, "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -1182,11 +1200,24 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/chalk-template": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz", + "integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==", + "dependencies": { + "chalk": "^4.1.2" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/chalk-template?sponsor=1" + } + }, "node_modules/chalk/node_modules/supports-color": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, "dependencies": { "has-flag": "^4.0.0" }, @@ -1245,7 +1276,6 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, "dependencies": { "color-name": "~1.1.4" }, @@ -1256,8 +1286,7 @@ "node_modules/color-name": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" }, "node_modules/combined-stream": { "version": "1.0.8", @@ -1285,97 +1314,33 @@ } }, "node_modules/command-line-usage": { - "version": "6.1.3", - "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-6.1.3.tgz", - "integrity": "sha512-sH5ZSPr+7UStsloltmDh7Ce5fb8XPlHyoPzTpyyMuYCtervL65+ubVZ6Q61cFtFl62UyJlc8/JwERRbAFPUqgw==", + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.1.tgz", + "integrity": "sha512-NCyznE//MuTjwi3y84QVUGEOT+P5oto1e1Pk/jFPVdPPfsG03qpTIl3yw6etR+v73d0lXsoojRpvbru2sqePxQ==", "dependencies": { - "array-back": "^4.0.2", - "chalk": "^2.4.2", - "table-layout": "^1.0.2", - "typical": "^5.2.0" + "array-back": "^6.2.2", + "chalk-template": "^0.4.0", + "table-layout": "^3.0.0", + "typical": "^7.1.1" }, "engines": { - "node": ">=8.0.0" - } - }, - "node_modules/command-line-usage/node_modules/ansi-styles": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", - "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", - "dependencies": { - "color-convert": "^1.9.0" - }, - "engines": { - "node": ">=4" + "node": ">=12.20.0" } }, "node_modules/command-line-usage/node_modules/array-back": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/array-back/-/array-back-4.0.2.tgz", - "integrity": "sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==", + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", + "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", "engines": { - "node": ">=8" - } - }, - "node_modules/command-line-usage/node_modules/chalk": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", - "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", - "dependencies": { - "ansi-styles": "^3.2.1", - "escape-string-regexp": "^1.0.5", - "supports-color": "^5.3.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/command-line-usage/node_modules/color-convert": { - "version": "1.9.3", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", - "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", - "dependencies": { - "color-name": "1.1.3" - } - }, - "node_modules/command-line-usage/node_modules/color-name": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" - }, - "node_modules/command-line-usage/node_modules/escape-string-regexp": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", - "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", - "engines": { - "node": ">=0.8.0" - } - }, - "node_modules/command-line-usage/node_modules/has-flag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", - "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", - "engines": { - "node": ">=4" - } - }, - "node_modules/command-line-usage/node_modules/supports-color": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", - "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", - "dependencies": { - "has-flag": "^3.0.0" - }, - "engines": { - "node": ">=4" + "node": ">=12.17" } }, "node_modules/command-line-usage/node_modules/typical": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz", - "integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", + "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==", "engines": { - "node": ">=8" + "node": ">=12.17" } }, "node_modules/concat-map": { @@ -1451,14 +1416,6 @@ "node": ">=6" } }, - "node_modules/deep-extend": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", - "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", - "engines": { - "node": ">=4.0.0" - } - }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -2237,9 +2194,9 @@ } }, "node_modules/flatbuffers": { - "version": "23.3.3", - "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.3.3.tgz", - "integrity": "sha512-jmreOaAT1t55keaf+Z259Tvh8tR/Srry9K8dgCgvizhKSEr6gLGgaOJI2WFL5fkOpGOGRZwxUrlFn0GCmXUy6g==" + "version": "23.5.26", + "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.5.26.tgz", + "integrity": "sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ==" }, "node_modules/flatted": { "version": "3.2.7", @@ -2535,7 +2492,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true, "engines": { "node": ">=8" } @@ -3048,6 +3004,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/lodash.assignwith": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/lodash.assignwith/-/lodash.assignwith-4.2.0.tgz", + "integrity": "sha512-ZznplvbvtjK2gMvnQ1BR/zqPFZmS6jbK4p+6Up4xcRYA7yMIwxHCfbTcrYxXKzzqLsQ05eJPVznEW3tuwV7k1g==" + }, "node_modules/lodash.camelcase": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", @@ -3668,14 +3629,6 @@ "node": ">=8.10.0" } }, - "node_modules/reduce-flatten": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/reduce-flatten/-/reduce-flatten-2.0.0.tgz", - "integrity": "sha512-EJ4UNY/U1t2P/2k6oqotuX2Cc3T6nxJwsM0N0asT7dhrtH1ltUxDn4NalSYmPE2rCkVpcf/X6R0wDwcFpzhd4w==", - "engines": { - "node": ">=6" - } - }, "node_modules/regexp.prototype.flags": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.0.tgz", @@ -3965,6 +3918,14 @@ "source-map": "^0.6.0" } }, + "node_modules/stream-read-all": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/stream-read-all/-/stream-read-all-3.0.1.tgz", + "integrity": "sha512-EWZT9XOceBPlVJRrYcykW8jyRSZYbkb/0ZK36uLEmoWVO5gxBOnntNTseNzfREsqxqdfEGQrD8SXQ3QWbBmq8A==", + "engines": { + "node": ">=10" + } + }, "node_modules/string-width": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", @@ -4082,33 +4043,39 @@ } }, "node_modules/table-layout": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-1.0.2.tgz", - "integrity": "sha512-qd/R7n5rQTRFi+Zf2sk5XVVd9UQl6ZkduPFC3S7WEGJAmetDTjY3qPN50eSKzwuzEyQKy5TN2TiZdkIjos2L6A==", + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-3.0.2.tgz", + "integrity": "sha512-rpyNZYRw+/C+dYkcQ3Pr+rLxW4CfHpXjPDnG7lYhdRoUcZTUt+KEsX+94RGp/aVp/MQU35JCITv2T/beY4m+hw==", "dependencies": { - "array-back": "^4.0.1", - "deep-extend": "~0.6.0", - "typical": "^5.2.0", - "wordwrapjs": "^4.0.0" + "@75lb/deep-merge": "^1.1.1", + "array-back": "^6.2.2", + "command-line-args": "^5.2.1", + "command-line-usage": "^7.0.0", + "stream-read-all": "^3.0.1", + "typical": "^7.1.1", + "wordwrapjs": "^5.1.0" + }, + "bin": { + "table-layout": "bin/cli.js" }, "engines": { - "node": ">=8.0.0" + "node": ">=12.17" } }, "node_modules/table-layout/node_modules/array-back": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/array-back/-/array-back-4.0.2.tgz", - "integrity": "sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==", + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", + "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", "engines": { - "node": ">=8" + "node": ">=12.17" } }, "node_modules/table-layout/node_modules/typical": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz", - "integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", + "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==", "engines": { - "node": ">=8" + "node": ">=12.17" } }, "node_modules/temp": { @@ -4553,23 +4520,11 @@ "dev": true }, "node_modules/wordwrapjs": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-4.0.1.tgz", - "integrity": "sha512-kKlNACbvHrkpIw6oPeYDSmdCTu2hdMHoyXLTcUKala++lx5Y+wjJ/e474Jqv5abnVmwxw08DiTuHmw69lJGksA==", - "dependencies": { - "reduce-flatten": "^2.0.0", - "typical": "^5.2.0" - }, + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz", + "integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg==", "engines": { - "node": ">=8.0.0" - } - }, - "node_modules/wordwrapjs/node_modules/typical": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz", - "integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==", - "engines": { - "node": ">=8" + "node": ">=12.17" } }, "node_modules/workerpool": { @@ -4690,32 +4645,48 @@ } }, "dependencies": { + "@75lb/deep-merge": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@75lb/deep-merge/-/deep-merge-1.1.1.tgz", + "integrity": "sha512-xvgv6pkMGBA6GwdyJbNAnDmfAIR/DfWhrj9jgWh3TY7gRm3KO46x/GPjRg6wJ0nOepwqrNxFfojebh0Df4h4Tw==", + "requires": { + "lodash.assignwith": "^4.2.0", + "typical": "^7.1.1" + }, + "dependencies": { + "typical": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", + "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==" + } + } + }, "@apache-arrow/ts": { - "version": "12.0.0", - "resolved": "https://registry.npmjs.org/@apache-arrow/ts/-/ts-12.0.0.tgz", - "integrity": "sha512-ArJ3Fw5W9RAeNWuyCU2CdjL/nEAZSVDG1p3jz/ZtLo/q3NTz2w7HUCOJeszejH/5alGX+QirYrJ5c6BW++/P7g==", + "version": "14.0.2", + "resolved": "https://registry.npmjs.org/@apache-arrow/ts/-/ts-14.0.2.tgz", + "integrity": "sha512-CtwAvLkK0CZv7xsYeCo91ml6PvlfzAmAJZkRYuz2GNBwfYufj5SVi0iuSMwIMkcU/szVwvLdzORSLa5PlF/2ug==", "requires": { "@types/command-line-args": "5.2.0", "@types/command-line-usage": "5.0.2", - "@types/node": "18.14.5", + "@types/node": "20.3.0", "@types/pad-left": "2.1.1", "command-line-args": "5.2.1", - "command-line-usage": "6.1.3", - "flatbuffers": "23.3.3", + "command-line-usage": "7.0.1", + "flatbuffers": "23.5.26", "json-bignum": "^0.0.3", "pad-left": "^2.1.0", - "tslib": "^2.5.0" + "tslib": "^2.5.3" }, "dependencies": { "@types/node": { - "version": "18.14.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.14.5.tgz", - "integrity": "sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw==" + "version": "20.3.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz", + "integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ==" }, "tslib": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", + "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" } } }, @@ -5268,7 +5239,6 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, "requires": { "color-convert": "^2.0.1" } @@ -5284,31 +5254,31 @@ } }, "apache-arrow": { - "version": "12.0.0", - "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-12.0.0.tgz", - "integrity": "sha512-uI+hnZZsGfNJiR/wG8j5yPQuDjmOHx4hZpkA743G4x3TlFrCpA3MMX7KUkIOIw0e/CwZ8NYuaMzaQsblA47qVA==", + "version": "14.0.2", + "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-14.0.2.tgz", + "integrity": "sha512-EBO2xJN36/XoY81nhLcwCJgFwkboDZeyNQ+OPsG7bCoQjc2BT0aTyH/MR6SrL+LirSNz+cYqjGRlupMMlP1aEg==", "requires": { "@types/command-line-args": "5.2.0", "@types/command-line-usage": "5.0.2", - "@types/node": "18.14.5", + "@types/node": "20.3.0", "@types/pad-left": "2.1.1", "command-line-args": "5.2.1", - "command-line-usage": "6.1.3", - "flatbuffers": "23.3.3", + "command-line-usage": "7.0.1", + "flatbuffers": "23.5.26", "json-bignum": "^0.0.3", "pad-left": "^2.1.0", - "tslib": "^2.5.0" + "tslib": "^2.5.3" }, "dependencies": { "@types/node": { - "version": "18.14.5", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.14.5.tgz", - "integrity": "sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw==" + "version": "20.3.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.3.0.tgz", + "integrity": "sha512-cumHmIAf6On83X7yP+LrsEyUOf/YlociZelmpRYaGFydoaPdxdt80MAbu6vWerQT2COCp2nPvHdsbD7tHn/YlQ==" }, "tslib": { - "version": "2.5.0", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz", - "integrity": "sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg==" + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", + "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" } } }, @@ -5505,7 +5475,6 @@ "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", - "dev": true, "requires": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -5515,13 +5484,20 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, "requires": { "has-flag": "^4.0.0" } } } }, + "chalk-template": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz", + "integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==", + "requires": { + "chalk": "^4.1.2" + } + }, "check-error": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.2.tgz", @@ -5559,7 +5535,6 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, "requires": { "color-name": "~1.1.4" } @@ -5567,8 +5542,7 @@ "color-name": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" }, "combined-stream": { "version": "1.0.8", @@ -5590,74 +5564,25 @@ } }, "command-line-usage": { - "version": "6.1.3", - "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-6.1.3.tgz", - "integrity": "sha512-sH5ZSPr+7UStsloltmDh7Ce5fb8XPlHyoPzTpyyMuYCtervL65+ubVZ6Q61cFtFl62UyJlc8/JwERRbAFPUqgw==", + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.1.tgz", + "integrity": "sha512-NCyznE//MuTjwi3y84QVUGEOT+P5oto1e1Pk/jFPVdPPfsG03qpTIl3yw6etR+v73d0lXsoojRpvbru2sqePxQ==", "requires": { - "array-back": "^4.0.2", - "chalk": "^2.4.2", - "table-layout": "^1.0.2", - "typical": "^5.2.0" + "array-back": "^6.2.2", + "chalk-template": "^0.4.0", + "table-layout": "^3.0.0", + "typical": "^7.1.1" }, "dependencies": { - "ansi-styles": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", - "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", - "requires": { - "color-convert": "^1.9.0" - } - }, "array-back": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/array-back/-/array-back-4.0.2.tgz", - "integrity": "sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==" - }, - "chalk": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", - "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", - "requires": { - "ansi-styles": "^3.2.1", - "escape-string-regexp": "^1.0.5", - "supports-color": "^5.3.0" - } - }, - "color-convert": { - "version": "1.9.3", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", - "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", - "requires": { - "color-name": "1.1.3" - } - }, - "color-name": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" - }, - "escape-string-regexp": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", - "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==" - }, - "has-flag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", - "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==" - }, - "supports-color": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", - "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", - "requires": { - "has-flag": "^3.0.0" - } + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", + "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==" }, "typical": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz", - "integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==" + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", + "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==" } } }, @@ -5716,11 +5641,6 @@ "type-detect": "^4.0.0" } }, - "deep-extend": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", - "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==" - }, "deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -6297,9 +6217,9 @@ } }, "flatbuffers": { - "version": "23.3.3", - "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.3.3.tgz", - "integrity": "sha512-jmreOaAT1t55keaf+Z259Tvh8tR/Srry9K8dgCgvizhKSEr6gLGgaOJI2WFL5fkOpGOGRZwxUrlFn0GCmXUy6g==" + "version": "23.5.26", + "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-23.5.26.tgz", + "integrity": "sha512-vE+SI9vrJDwi1oETtTIFldC/o9GsVKRM+s6EL0nQgxXlYV1Vc4Tk30hj4xGICftInKQKj1F3up2n8UbIVobISQ==" }, "flatted": { "version": "3.2.7", @@ -6502,8 +6422,7 @@ "has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==" }, "has-property-descriptors": { "version": "1.0.0", @@ -6856,6 +6775,11 @@ "p-locate": "^5.0.0" } }, + "lodash.assignwith": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/lodash.assignwith/-/lodash.assignwith-4.2.0.tgz", + "integrity": "sha512-ZznplvbvtjK2gMvnQ1BR/zqPFZmS6jbK4p+6Up4xcRYA7yMIwxHCfbTcrYxXKzzqLsQ05eJPVznEW3tuwV7k1g==" + }, "lodash.camelcase": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", @@ -7323,11 +7247,6 @@ "picomatch": "^2.2.1" } }, - "reduce-flatten": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/reduce-flatten/-/reduce-flatten-2.0.0.tgz", - "integrity": "sha512-EJ4UNY/U1t2P/2k6oqotuX2Cc3T6nxJwsM0N0asT7dhrtH1ltUxDn4NalSYmPE2rCkVpcf/X6R0wDwcFpzhd4w==" - }, "regexp.prototype.flags": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.0.tgz", @@ -7523,6 +7442,11 @@ "source-map": "^0.6.0" } }, + "stream-read-all": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/stream-read-all/-/stream-read-all-3.0.1.tgz", + "integrity": "sha512-EWZT9XOceBPlVJRrYcykW8jyRSZYbkb/0ZK36uLEmoWVO5gxBOnntNTseNzfREsqxqdfEGQrD8SXQ3QWbBmq8A==" + }, "string-width": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", @@ -7604,25 +7528,28 @@ "dev": true }, "table-layout": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-1.0.2.tgz", - "integrity": "sha512-qd/R7n5rQTRFi+Zf2sk5XVVd9UQl6ZkduPFC3S7WEGJAmetDTjY3qPN50eSKzwuzEyQKy5TN2TiZdkIjos2L6A==", + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-3.0.2.tgz", + "integrity": "sha512-rpyNZYRw+/C+dYkcQ3Pr+rLxW4CfHpXjPDnG7lYhdRoUcZTUt+KEsX+94RGp/aVp/MQU35JCITv2T/beY4m+hw==", "requires": { - "array-back": "^4.0.1", - "deep-extend": "~0.6.0", - "typical": "^5.2.0", - "wordwrapjs": "^4.0.0" + "@75lb/deep-merge": "^1.1.1", + "array-back": "^6.2.2", + "command-line-args": "^5.2.1", + "command-line-usage": "^7.0.0", + "stream-read-all": "^3.0.1", + "typical": "^7.1.1", + "wordwrapjs": "^5.1.0" }, "dependencies": { "array-back": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/array-back/-/array-back-4.0.2.tgz", - "integrity": "sha512-NbdMezxqf94cnNfWLL7V/im0Ub+Anbb0IoZhvzie8+4HJ4nMQuzHuy49FkGYCJK2yAloZ3meiB6AVMClbrI1vg==" + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", + "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==" }, "typical": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz", - "integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==" + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/typical/-/typical-7.1.1.tgz", + "integrity": "sha512-T+tKVNs6Wu7IWiAce5BgMd7OZfNYUndHwc5MknN+UHOudi7sGZzuHdCadllRuqJ3fPtgFtIH9+lt9qRv6lmpfA==" } } }, @@ -7940,20 +7867,9 @@ "dev": true }, "wordwrapjs": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-4.0.1.tgz", - "integrity": "sha512-kKlNACbvHrkpIw6oPeYDSmdCTu2hdMHoyXLTcUKala++lx5Y+wjJ/e474Jqv5abnVmwxw08DiTuHmw69lJGksA==", - "requires": { - "reduce-flatten": "^2.0.0", - "typical": "^5.2.0" - }, - "dependencies": { - "typical": { - "version": "5.2.0", - "resolved": "https://registry.npmjs.org/typical/-/typical-5.2.0.tgz", - "integrity": "sha512-dvdQgNDNJo+8B2uBQoqdb11eUCE1JQXhvjC/CZtgvZseVd5TYMXnq0+vuUemXbd/Se29cTaUuPX3YIc2xgbvIg==" - } - } + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz", + "integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg==" }, "workerpool": { "version": "6.2.1", diff --git a/node/package.json b/node/package.json index fb15797d..0b59a2aa 100644 --- a/node/package.json +++ b/node/package.json @@ -57,9 +57,9 @@ "uuid": "^9.0.0" }, "dependencies": { - "@apache-arrow/ts": "^12.0.0", + "@apache-arrow/ts": "^14.0.2", "@neon-rs/load": "^0.0.74", - "apache-arrow": "^12.0.0", + "apache-arrow": "^14.0.2", "axios": "^1.4.0" }, "os": [ diff --git a/node/src/arrow.ts b/node/src/arrow.ts index 90e59d31..2010c220 100644 --- a/node/src/arrow.ts +++ b/node/src/arrow.ts @@ -17,10 +17,9 @@ import { Float32, makeBuilder, RecordBatchFileWriter, - Utf8, - type Vector, + Utf8, type Vector, FixedSizeList, - vectorFromArray, type Schema, Table as ArrowTable, RecordBatchStreamWriter, List, Float64 + vectorFromArray, type Schema, Table as ArrowTable, RecordBatchStreamWriter, List, Float64, RecordBatch, makeData, Struct } from 'apache-arrow' import { type EmbeddingFunction } from './index' @@ -78,6 +77,7 @@ export async function convertToTable (data: Array>, e } records[columnsKey] = listBuilder.finish().toVector() } else { + // TODO if this is a struct field then recursively align the subfields records[columnsKey] = vectorFromArray(values) } } @@ -110,21 +110,27 @@ function newVectorType (dim: number): FixedSizeList { } // Converts an Array of records into Arrow IPC format -export async function fromRecordsToBuffer (data: Array>, embeddings?: EmbeddingFunction): Promise { - const table = await convertToTable(data, embeddings) +export async function fromRecordsToBuffer (data: Array>, embeddings?: EmbeddingFunction, schema?: Schema): Promise { + let table = await convertToTable(data, embeddings) + if (schema !== undefined) { + table = alignTable(table, schema) + } const writer = RecordBatchFileWriter.writeAll(table) return Buffer.from(await writer.toUint8Array()) } // Converts an Array of records into Arrow IPC stream format -export async function fromRecordsToStreamBuffer (data: Array>, embeddings?: EmbeddingFunction): Promise { - const table = await convertToTable(data, embeddings) +export async function fromRecordsToStreamBuffer (data: Array>, embeddings?: EmbeddingFunction, schema?: Schema): Promise { + let table = await convertToTable(data, embeddings) + if (schema !== undefined) { + table = alignTable(table, schema) + } const writer = RecordBatchStreamWriter.writeAll(table) return Buffer.from(await writer.toUint8Array()) } // Converts an Arrow Table into Arrow IPC format -export async function fromTableToBuffer (table: ArrowTable, embeddings?: EmbeddingFunction): Promise { +export async function fromTableToBuffer (table: ArrowTable, embeddings?: EmbeddingFunction, schema?: Schema): Promise { if (embeddings !== undefined) { const source = table.getChild(embeddings.sourceColumn) @@ -136,12 +142,15 @@ export async function fromTableToBuffer (table: ArrowTable, embeddings?: Embe const column = vectorFromArray(vectors, newVectorType(vectors[0].length)) table = table.assign(new ArrowTable({ vector: column })) } + if (schema !== undefined) { + table = alignTable(table, schema) + } const writer = RecordBatchFileWriter.writeAll(table) return Buffer.from(await writer.toUint8Array()) } // Converts an Arrow Table into Arrow IPC stream format -export async function fromTableToStreamBuffer (table: ArrowTable, embeddings?: EmbeddingFunction): Promise { +export async function fromTableToStreamBuffer (table: ArrowTable, embeddings?: EmbeddingFunction, schema?: Schema): Promise { if (embeddings !== undefined) { const source = table.getChild(embeddings.sourceColumn) @@ -153,10 +162,36 @@ export async function fromTableToStreamBuffer (table: ArrowTable, embeddings? const column = vectorFromArray(vectors, newVectorType(vectors[0].length)) table = table.assign(new ArrowTable({ vector: column })) } + if (schema !== undefined) { + table = alignTable(table, schema) + } const writer = RecordBatchStreamWriter.writeAll(table) return Buffer.from(await writer.toUint8Array()) } +function alignBatch (batch: RecordBatch, schema: Schema): RecordBatch { + const alignedChildren = [] + for (const field of schema.fields) { + const indexInBatch = batch.schema.fields?.findIndex((f) => f.name === field.name) + if (indexInBatch < 0) { + throw new Error(`The column ${field.name} was not found in the Arrow Table`) + } + alignedChildren.push(batch.data.children[indexInBatch]) + } + const newData = makeData({ + type: new Struct(schema.fields), + length: batch.numRows, + nullCount: batch.nullCount, + children: alignedChildren + }) + return new RecordBatch(schema, newData) +} + +function alignTable (table: ArrowTable, schema: Schema): ArrowTable { + const alignedBatches = table.batches.map(batch => alignBatch(batch, schema)) + return new ArrowTable(schema, alignedBatches) +} + // Creates an empty Arrow Table export function createEmptyTable (schema: Schema): ArrowTable { return new ArrowTable(schema) diff --git a/node/src/index.ts b/node/src/index.ts index a5de03eb..e11659e5 100644 --- a/node/src/index.ts +++ b/node/src/index.ts @@ -485,10 +485,10 @@ export class LocalConnection implements Connection { } buffer = await fromTableToBuffer(createEmptyTable(schema)) } else if (data instanceof ArrowTable) { - buffer = await fromTableToBuffer(data, embeddingFunction) + buffer = await fromTableToBuffer(data, embeddingFunction, schema) } else { // data is Array> - buffer = await fromRecordsToBuffer(data, embeddingFunction) + buffer = await fromRecordsToBuffer(data, embeddingFunction, schema) } const tbl = await tableCreate.call(this._db, name, buffer, writeOptions?.writeMode?.toString(), ...getAwsArgs(this._options())) @@ -560,9 +560,10 @@ export class LocalTable implements Table { * @return The number of rows added to the table */ async add (data: Array>): Promise { + const schema = await this.schema return tableAdd.call( this._tbl, - await fromRecordsToBuffer(data, this._embeddings), + await fromRecordsToBuffer(data, this._embeddings, schema), WriteMode.Append.toString(), ...getAwsArgs(this._options()) ).then((newTable: any) => { this._tbl = newTable }) diff --git a/node/src/test/test.ts b/node/src/test/test.ts index 68a0f331..d1b44eb9 100644 --- a/node/src/test/test.ts +++ b/node/src/test/test.ts @@ -176,6 +176,26 @@ describe('LanceDB client', function () { assert.deepEqual(await con.tableNames(), ['vectors']) }) + it('create a table with a schema and records', async function () { + const dir = await track().mkdir('lancejs') + const con = await lancedb.connect(dir) + + const schema = new Schema( + [new Field('id', new Int32()), + new Field('name', new Utf8()), + new Field('vector', new FixedSizeList(2, new Field('item', new Float32(), true)), false) + ] + ) + const data = [ + { vector: [0.5, 0.2], name: 'foo', id: 0 }, + { vector: [0.3, 0.1], name: 'bar', id: 1 } + ] + // even thought the keys in data is out of order it should still work + const table = await con.createTable({ name: 'vectors', data, schema }) + assert.equal(table.name, 'vectors') + assert.deepEqual(await con.tableNames(), ['vectors']) + }) + it('create a table with a empty data array', async function () { const dir = await track().mkdir('lancejs') const con = await lancedb.connect(dir) @@ -294,6 +314,25 @@ describe('LanceDB client', function () { assert.equal(await table.countRows(), 4) }) + it('appends records with fields in a different order', async function () { + const dir = await track().mkdir('lancejs') + const con = await lancedb.connect(dir) + + const data = [ + { id: 1, vector: [0.1, 0.2], price: 10, name: 'a' }, + { id: 2, vector: [1.1, 1.2], price: 50, name: 'b' } + ] + + const table = await con.createTable('vectors', data) + + const dataAdd = [ + { id: 3, vector: [2.1, 2.2], name: 'c', price: 10 }, + { id: 4, vector: [3.1, 3.2], name: 'd', price: 50 } + ] + await table.add(dataAdd) + assert.equal(await table.countRows(), 4) + }) + it('overwrite all records in a table', async function () { const uri = await createTestDB() const con = await lancedb.connect(uri) From 45b006d68c592f6f4a909bc98123a74d7d224837 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Thu, 11 Jan 2024 10:58:49 -0800 Subject: [PATCH 38/43] chore: remove black as dependency (#808) We use `ruff` in CI and dev workflow now. --- .github/workflows/python.yml | 4 ++-- python/README.md | 6 +++--- python/pyproject.toml | 5 +---- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index d490520a..835cd41e 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -49,7 +49,7 @@ jobs: timeout-minutes: 30 strategy: matrix: - config: + config: - name: x86 Mac runner: macos-13 - name: Arm Mac @@ -74,7 +74,7 @@ jobs: run: | pip install -e .[tests] pip install tantivy@git+https://github.com/quickwit-oss/tantivy-py#164adc87e1a033117001cf70e38c82a53014d985 - pip install pytest pytest-mock black + pip install pytest pytest-mock - name: Run tests run: pytest -m "not slow" -x -v --durations=30 tests pydantic1x: diff --git a/python/README.md b/python/README.md index faad44d4..290447cd 100644 --- a/python/README.md +++ b/python/README.md @@ -45,8 +45,8 @@ pytest To run linter and automatically fix all errors: ```bash -black . -isort . +ruff format python +ruff --fix python ``` If any packages are missing, install them with: @@ -82,4 +82,4 @@ pip install tantivy To run the unit tests: ```bash pytest -``` \ No newline at end of file +``` diff --git a/python/pyproject.toml b/python/pyproject.toml index f1f9cc26..7203dc60 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -46,7 +46,7 @@ repository = "https://github.com/lancedb/lancedb" [project.optional-dependencies] tests = ["aiohttp", "pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "duckdb", "pytz"] -dev = ["ruff", "pre-commit", "black"] +dev = ["ruff", "pre-commit"] docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] clip = ["torch", "pillow", "open-clip"] embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "InstructorEmbedding"] @@ -55,9 +55,6 @@ embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" -[tool.isort] -profile = "black" - [tool.ruff] select = ["F", "E", "W", "I", "G", "TCH", "PERF"] From 63e273606e842cdc841e1ea9c84e33edfb487453 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 11 Jan 2024 13:28:10 -0800 Subject: [PATCH 39/43] upgrade lance (#809) --- Cargo.toml | 8 ++++---- python/pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8cc810ce..4e8482d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,10 +5,10 @@ exclude = ["python"] resolver = "2" [workspace.dependencies] -lance = { "version" = "=0.9.5", "features" = ["dynamodb"] } -lance-index = { "version" = "=0.9.5" } -lance-linalg = { "version" = "=0.9.5" } -lance-testing = { "version" = "=0.9.5" } +lance = { "version" = "=0.9.6", "features" = ["dynamodb"] } +lance-index = { "version" = "=0.9.6" } +lance-linalg = { "version" = "=0.9.6" } +lance-testing = { "version" = "=0.9.6" } # Note that this one does not include pyarrow arrow = { version = "49.0.0", optional = false } arrow-array = "49.0" diff --git a/python/pyproject.toml b/python/pyproject.toml index 7203dc60..c1cef035 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -3,7 +3,7 @@ name = "lancedb" version = "0.4.3" dependencies = [ "deprecation", - "pylance==0.9.5", + "pylance==0.9.6", "ratelimiter~=1.0", "retry>=0.9.2", "tqdm>=4.27.0", From 1387dc6e48f00a11b2fec7fbffe7e6b16f2ff532 Mon Sep 17 00:00:00 2001 From: Lance Release Date: Thu, 11 Jan 2024 21:29:00 +0000 Subject: [PATCH 40/43] =?UTF-8?q?[python]=20Bump=20version:=200.4.3=20?= =?UTF-8?q?=E2=86=92=200.4.4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/.bumpversion.cfg | 2 +- python/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/.bumpversion.cfg b/python/.bumpversion.cfg index c69f92cd..00b8d6ee 100644 --- a/python/.bumpversion.cfg +++ b/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.3 +current_version = 0.4.4 commit = True message = [python] Bump version: {current_version} → {new_version} tag = True diff --git a/python/pyproject.toml b/python/pyproject.toml index c1cef035..e993d9d7 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lancedb" -version = "0.4.3" +version = "0.4.4" dependencies = [ "deprecation", "pylance==0.9.6", From 55cc3ed5a279eb28a1b36ef0131cd664f247295f Mon Sep 17 00:00:00 2001 From: Lance Release Date: Thu, 11 Jan 2024 21:33:55 +0000 Subject: [PATCH 41/43] =?UTF-8?q?Bump=20version:=200.4.2=20=E2=86=92=200.4?= =?UTF-8?q?.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- node/package.json | 12 ++++++------ rust/ffi/node/Cargo.toml | 2 +- rust/vectordb/Cargo.toml | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index a4d2a718..5d9588df 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.2 +current_version = 0.4.3 commit = True message = Bump version: {current_version} → {new_version} tag = True diff --git a/node/package.json b/node/package.json index 0b59a2aa..9a134973 100644 --- a/node/package.json +++ b/node/package.json @@ -1,6 +1,6 @@ { "name": "vectordb", - "version": "0.4.2", + "version": "0.4.3", "description": " Serverless, low-latency vector database for AI applications", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -81,10 +81,10 @@ } }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.2", - "@lancedb/vectordb-darwin-x64": "0.4.2", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.2", - "@lancedb/vectordb-linux-x64-gnu": "0.4.2", - "@lancedb/vectordb-win32-x64-msvc": "0.4.2" + "@lancedb/vectordb-darwin-arm64": "0.4.3", + "@lancedb/vectordb-darwin-x64": "0.4.3", + "@lancedb/vectordb-linux-arm64-gnu": "0.4.3", + "@lancedb/vectordb-linux-x64-gnu": "0.4.3", + "@lancedb/vectordb-win32-x64-msvc": "0.4.3" } } diff --git a/rust/ffi/node/Cargo.toml b/rust/ffi/node/Cargo.toml index ce961a2d..0302063d 100644 --- a/rust/ffi/node/Cargo.toml +++ b/rust/ffi/node/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vectordb-node" -version = "0.4.2" +version = "0.4.3" description = "Serverless, low-latency vector database for AI applications" license = "Apache-2.0" edition = "2018" diff --git a/rust/vectordb/Cargo.toml b/rust/vectordb/Cargo.toml index f1bfe216..63c73221 100644 --- a/rust/vectordb/Cargo.toml +++ b/rust/vectordb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vectordb" -version = "0.4.2" +version = "0.4.3" edition = "2021" description = "LanceDB: A serverless, low-latency vector database for AI applications" license = "Apache-2.0" From 162f8536d176851b799ba4184a5e6d67cb46289a Mon Sep 17 00:00:00 2001 From: Lance Release Date: Thu, 11 Jan 2024 21:34:04 +0000 Subject: [PATCH 42/43] Updating package-lock.json --- node/package-lock.json | 104 +++-------------------------------------- 1 file changed, 7 insertions(+), 97 deletions(-) diff --git a/node/package-lock.json b/node/package-lock.json index 1373fb99..bb89849c 100644 --- a/node/package-lock.json +++ b/node/package-lock.json @@ -1,12 +1,12 @@ { "name": "vectordb", - "version": "0.4.2", + "version": "0.4.3", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "vectordb", - "version": "0.4.2", + "version": "0.4.3", "cpu": [ "x64", "arm64" @@ -53,11 +53,11 @@ "uuid": "^9.0.0" }, "optionalDependencies": { - "@lancedb/vectordb-darwin-arm64": "0.4.2", - "@lancedb/vectordb-darwin-x64": "0.4.2", - "@lancedb/vectordb-linux-arm64-gnu": "0.4.2", - "@lancedb/vectordb-linux-x64-gnu": "0.4.2", - "@lancedb/vectordb-win32-x64-msvc": "0.4.2" + "@lancedb/vectordb-darwin-arm64": "0.4.3", + "@lancedb/vectordb-darwin-x64": "0.4.3", + "@lancedb/vectordb-linux-arm64-gnu": "0.4.3", + "@lancedb/vectordb-linux-x64-gnu": "0.4.3", + "@lancedb/vectordb-win32-x64-msvc": "0.4.3" } }, "node_modules/@75lb/deep-merge": { @@ -336,66 +336,6 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, - "node_modules/@lancedb/vectordb-darwin-arm64": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.2.tgz", - "integrity": "sha512-Ec73W2IHnZK4VC8g/7JyLbgcwcpNb9YI20yEhfTjEEFjJKoElZhDD/ZgghC3QQSRnrXFTxDzPK1V9BDT5QB2Hg==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@lancedb/vectordb-darwin-x64": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.2.tgz", - "integrity": "sha512-tj0JJlOfOdeSAfmM7EZhrhFdCFjoq9Bmrjt4741BNjtF+Nv4Otl53lFtUQrexTr4oh/E1yY1qaydJ7K++8u3UA==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@lancedb/vectordb-linux-arm64-gnu": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.2.tgz", - "integrity": "sha512-OQ7ra5Q5RrLLwxIyI338KfQ2sSl8NJfqAHWvwiMtjCYFFYxIJGjX7U0I2MjSEPqJ5/ZoyjV4mjsvs0G1q20u+Q==", - "cpu": [ - "arm64" - ], - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@lancedb/vectordb-linux-x64-gnu": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.2.tgz", - "integrity": "sha512-9tgIFSOYqNJzonnYsQr7v2gGdJm8aZ62UsVX2SWAIVhypoP4A05tAlbzjBgKO3R5xy5gpcW8tt/Pt8IsYWON7Q==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@lancedb/vectordb-win32-x64-msvc": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.2.tgz", - "integrity": "sha512-jhG3MqZ3r8BexXANLRNX57RAnCZT9psdSBORG3KTu5qe2xaunRlJNSA2kk8a79tf+gtUT/BAmMiXMzAi/dwq8w==", - "cpu": [ - "x64" - ], - "optional": true, - "os": [ - "win32" - ] - }, "node_modules/@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", @@ -4839,36 +4779,6 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, - "@lancedb/vectordb-darwin-arm64": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.2.tgz", - "integrity": "sha512-Ec73W2IHnZK4VC8g/7JyLbgcwcpNb9YI20yEhfTjEEFjJKoElZhDD/ZgghC3QQSRnrXFTxDzPK1V9BDT5QB2Hg==", - "optional": true - }, - "@lancedb/vectordb-darwin-x64": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.2.tgz", - "integrity": "sha512-tj0JJlOfOdeSAfmM7EZhrhFdCFjoq9Bmrjt4741BNjtF+Nv4Otl53lFtUQrexTr4oh/E1yY1qaydJ7K++8u3UA==", - "optional": true - }, - "@lancedb/vectordb-linux-arm64-gnu": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.2.tgz", - "integrity": "sha512-OQ7ra5Q5RrLLwxIyI338KfQ2sSl8NJfqAHWvwiMtjCYFFYxIJGjX7U0I2MjSEPqJ5/ZoyjV4mjsvs0G1q20u+Q==", - "optional": true - }, - "@lancedb/vectordb-linux-x64-gnu": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.2.tgz", - "integrity": "sha512-9tgIFSOYqNJzonnYsQr7v2gGdJm8aZ62UsVX2SWAIVhypoP4A05tAlbzjBgKO3R5xy5gpcW8tt/Pt8IsYWON7Q==", - "optional": true - }, - "@lancedb/vectordb-win32-x64-msvc": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.2.tgz", - "integrity": "sha512-jhG3MqZ3r8BexXANLRNX57RAnCZT9psdSBORG3KTu5qe2xaunRlJNSA2kk8a79tf+gtUT/BAmMiXMzAi/dwq8w==", - "optional": true - }, "@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", From f0a654036e962595f847ffbf0e9734b27c42fdbd Mon Sep 17 00:00:00 2001 From: Lance Release Date: Thu, 11 Jan 2024 22:21:42 +0000 Subject: [PATCH 43/43] Updating package-lock.json --- node/package-lock.json | 90 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/node/package-lock.json b/node/package-lock.json index bb89849c..2e934c73 100644 --- a/node/package-lock.json +++ b/node/package-lock.json @@ -336,6 +336,66 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "node_modules/@lancedb/vectordb-darwin-arm64": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.3.tgz", + "integrity": "sha512-47CvvSaV1EdUsFEpXUJApTk+hMzAhCxVizipCFUlXCgcmzpCDL86wNgJij/X9a+j6zADhIX//Lsu0qd/an/Bpw==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@lancedb/vectordb-darwin-x64": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.3.tgz", + "integrity": "sha512-UlZZv8CmJIuRJNJG+Y1VmFsGyPR8W/72Q5EwgMMsSES6zpMQ9pNdBDWhL3UGX6nMRgnbprkwYiWJ3xHhJvtqtw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@lancedb/vectordb-linux-arm64-gnu": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.3.tgz", + "integrity": "sha512-L6NVJr/lKEd8+904FzZNpT8BGQMs2cHNYbGJMIaVvGnMiIJgKAFKtOyGtdDjoe1xRZoEw21yjRGksGbnRO5wHQ==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@lancedb/vectordb-linux-x64-gnu": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.3.tgz", + "integrity": "sha512-OBx3WF3pK0xNfFJeErmuD9R2QWLa3XdeZspyTsIrQmBDeKj3HKh8y7Scpx4NH5Y09+9JNqRRKRZN7OqWTYhITg==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@lancedb/vectordb-win32-x64-msvc": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.3.tgz", + "integrity": "sha512-n9IvR81NXZKnSN91mrgeXbEyCiGM+YLJpOgbdHoEtMP04VDnS+iSU4jGOtQBKErvWeCJQaGFQ9qzdcVchpRGyw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz", @@ -4779,6 +4839,36 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "@lancedb/vectordb-darwin-arm64": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.3.tgz", + "integrity": "sha512-47CvvSaV1EdUsFEpXUJApTk+hMzAhCxVizipCFUlXCgcmzpCDL86wNgJij/X9a+j6zADhIX//Lsu0qd/an/Bpw==", + "optional": true + }, + "@lancedb/vectordb-darwin-x64": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.4.3.tgz", + "integrity": "sha512-UlZZv8CmJIuRJNJG+Y1VmFsGyPR8W/72Q5EwgMMsSES6zpMQ9pNdBDWhL3UGX6nMRgnbprkwYiWJ3xHhJvtqtw==", + "optional": true + }, + "@lancedb/vectordb-linux-arm64-gnu": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.4.3.tgz", + "integrity": "sha512-L6NVJr/lKEd8+904FzZNpT8BGQMs2cHNYbGJMIaVvGnMiIJgKAFKtOyGtdDjoe1xRZoEw21yjRGksGbnRO5wHQ==", + "optional": true + }, + "@lancedb/vectordb-linux-x64-gnu": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.4.3.tgz", + "integrity": "sha512-OBx3WF3pK0xNfFJeErmuD9R2QWLa3XdeZspyTsIrQmBDeKj3HKh8y7Scpx4NH5Y09+9JNqRRKRZN7OqWTYhITg==", + "optional": true + }, + "@lancedb/vectordb-win32-x64-msvc": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.4.3.tgz", + "integrity": "sha512-n9IvR81NXZKnSN91mrgeXbEyCiGM+YLJpOgbdHoEtMP04VDnS+iSU4jGOtQBKErvWeCJQaGFQ9qzdcVchpRGyw==", + "optional": true + }, "@neon-rs/cli": { "version": "0.0.160", "resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",