mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-06 11:52:57 +00:00
Compare commits
102 Commits
v0.15.1-be
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c0097c5f0a | ||
|
|
c199708e64 | ||
|
|
4a47150ae7 | ||
|
|
f86b20a564 | ||
|
|
cc81f3e1a5 | ||
|
|
bc49c4db82 | ||
|
|
d2eec46f17 | ||
|
|
51437bc228 | ||
|
|
fa53cfcfd2 | ||
|
|
374fe0ad95 | ||
|
|
35e5b84ba9 | ||
|
|
7c12d497b0 | ||
|
|
dfe4ba8dad | ||
|
|
fa1b9ad5bd | ||
|
|
8877eb020d | ||
|
|
01e4291d21 | ||
|
|
ab3ea76ad1 | ||
|
|
728ef8657d | ||
|
|
0b13901a16 | ||
|
|
84b110e0ef | ||
|
|
e1836e54e3 | ||
|
|
4ba5326880 | ||
|
|
b036a69300 | ||
|
|
5b12a47119 | ||
|
|
769d483e50 | ||
|
|
9ecb11fe5a | ||
|
|
22bd8329f3 | ||
|
|
a736fad149 | ||
|
|
072adc41aa | ||
|
|
c6f25ef1f0 | ||
|
|
2f0c5baea2 | ||
|
|
a63dd66d41 | ||
|
|
d6b3ccb37b | ||
|
|
c4f99e82e5 | ||
|
|
979a2d3d9d | ||
|
|
7ac5f74c80 | ||
|
|
ecdee4d2b1 | ||
|
|
f391ed828a | ||
|
|
a99a450f2b | ||
|
|
6fa1f37506 | ||
|
|
544382df5e | ||
|
|
784f00ef6d | ||
|
|
96d7446f70 | ||
|
|
99ea78fb55 | ||
|
|
8eef4cdc28 | ||
|
|
0f102f02c3 | ||
|
|
a33a0670f6 | ||
|
|
14c9ff46d1 | ||
|
|
1865f7decf | ||
|
|
a608621476 | ||
|
|
00514999ff | ||
|
|
b3b597fef6 | ||
|
|
bf17144591 | ||
|
|
09e110525f | ||
|
|
40f0dbb64d | ||
|
|
3b19e96ae7 | ||
|
|
78a17ad54c | ||
|
|
a8e6b491e2 | ||
|
|
cea541ca46 | ||
|
|
873ffc1042 | ||
|
|
83273ad997 | ||
|
|
d18d63c69d | ||
|
|
c3e865e8d0 | ||
|
|
a7755cb313 | ||
|
|
3490f3456f | ||
|
|
0a1d0693e1 | ||
|
|
fd330b4b4b | ||
|
|
d4e9fc08e0 | ||
|
|
3626f2f5e1 | ||
|
|
e64712cfa5 | ||
|
|
3e3118f85c | ||
|
|
592598a333 | ||
|
|
5ad21341c9 | ||
|
|
6e08caa091 | ||
|
|
7e259d8b0f | ||
|
|
e84f747464 | ||
|
|
998cd43fe6 | ||
|
|
4bc7eebe61 | ||
|
|
2e3b34e79b | ||
|
|
e7574698eb | ||
|
|
801a9e5f6f | ||
|
|
4e5fbe6c99 | ||
|
|
1a449fa49e | ||
|
|
6bf742c759 | ||
|
|
ef3093bc23 | ||
|
|
16851389ea | ||
|
|
c269524b2f | ||
|
|
f6eef14313 | ||
|
|
32716adaa3 | ||
|
|
5e98b7f4c0 | ||
|
|
3f2589c11f | ||
|
|
e3b99694d6 | ||
|
|
9d42dc349c | ||
|
|
482f1ee1d3 | ||
|
|
2f39274a66 | ||
|
|
2fc174f532 | ||
|
|
dba85f4d6f | ||
|
|
555fa26147 | ||
|
|
e05c0cd87e | ||
|
|
25c17ebf4e | ||
|
|
87b12b57dc | ||
|
|
3dc9b71914 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.15.1-beta.2"
|
||||
current_version = "0.18.0-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
12
.github/workflows/nodejs.yml
vendored
12
.github/workflows/nodejs.yml
vendored
@@ -106,6 +106,18 @@ jobs:
|
||||
python ci/mock_openai.py &
|
||||
cd nodejs/examples
|
||||
npm test
|
||||
- name: Check docs
|
||||
run: |
|
||||
# We run this as part of the job because the binary needs to be built
|
||||
# first to export the types of the native code.
|
||||
set -e
|
||||
npm ci
|
||||
npm run docs
|
||||
if ! git diff --exit-code; then
|
||||
echo "Docs need to be updated"
|
||||
echo "Run 'npm run docs', fix any warnings, and commit the changes."
|
||||
exit 1
|
||||
fi
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
runs-on: "macos-14"
|
||||
|
||||
48
.github/workflows/python.yml
vendored
48
.github/workflows/python.yml
vendored
@@ -33,13 +33,14 @@ jobs:
|
||||
python-version: "3.12"
|
||||
- name: Install ruff
|
||||
run: |
|
||||
pip install ruff==0.8.4
|
||||
pip install ruff==0.9.9
|
||||
- name: Format check
|
||||
run: ruff format --check .
|
||||
- name: Lint
|
||||
run: ruff check .
|
||||
doctest:
|
||||
name: "Doctest"
|
||||
|
||||
type-check:
|
||||
name: "Type Check"
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-22.04"
|
||||
defaults:
|
||||
@@ -54,7 +55,36 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: "3.12"
|
||||
- name: Install protobuf compiler
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler
|
||||
pip install toml
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python ../ci/parse_requirements.py pyproject.toml --extras dev,tests,embeddings > requirements.txt
|
||||
pip install -r requirements.txt
|
||||
- name: Run pyright
|
||||
run: pyright
|
||||
|
||||
doctest:
|
||||
name: "Doctest"
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-24.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
working-directory: python
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
- name: Install protobuf
|
||||
run: |
|
||||
@@ -75,8 +105,8 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
python-minor-version: ["9", "11"]
|
||||
runs-on: "ubuntu-22.04"
|
||||
python-minor-version: ["9", "12"]
|
||||
runs-on: "ubuntu-24.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -127,7 +157,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: "3.12"
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: python
|
||||
@@ -157,7 +187,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: "3.12"
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: python
|
||||
@@ -168,7 +198,7 @@ jobs:
|
||||
run: rm -rf target/wheels
|
||||
pydantic1x:
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-22.04"
|
||||
runs-on: "ubuntu-24.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
23
.github/workflows/rust.yml
vendored
23
.github/workflows/rust.yml
vendored
@@ -61,7 +61,12 @@ jobs:
|
||||
CXX: clang++
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
# Remote cargo.lock to force a fresh build
|
||||
# Building without a lock file often requires the latest Rust version since downstream
|
||||
# dependencies may have updated their minimum Rust version.
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
toolchain: "stable"
|
||||
# Remove cargo.lock to force a fresh build
|
||||
- name: Remove Cargo.lock
|
||||
run: rm -f Cargo.lock
|
||||
- uses: rui314/setup-mold@v1
|
||||
@@ -179,15 +184,17 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Install dependencies
|
||||
- name: Install dependencies (part 1)
|
||||
run: |
|
||||
set -e
|
||||
apk add protobuf-dev curl clang lld llvm19 grep npm bash msitools sed
|
||||
|
||||
curl --proto '=https' --tlsv1.3 -sSf https://raw.githubusercontent.com/rust-lang/rustup/refs/heads/master/rustup-init.sh | sh -s -- -y
|
||||
source $HOME/.cargo/env
|
||||
rustup target add aarch64-pc-windows-msvc
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
target: aarch64-pc-windows-msvc
|
||||
- name: Install dependencies (part 2)
|
||||
run: |
|
||||
set -e
|
||||
mkdir -p sysroot
|
||||
cd sysroot
|
||||
sh ../ci/sysroot-aarch64-pc-windows-msvc.sh
|
||||
@@ -259,7 +266,7 @@ jobs:
|
||||
- name: Install Rust
|
||||
run: |
|
||||
Invoke-WebRequest https://win.rustup.rs/x86_64 -OutFile rustup-init.exe
|
||||
.\rustup-init.exe -y --default-host aarch64-pc-windows-msvc
|
||||
.\rustup-init.exe -y --default-host aarch64-pc-windows-msvc --default-toolchain 1.83.0
|
||||
shell: powershell
|
||||
- name: Add Rust to PATH
|
||||
run: |
|
||||
|
||||
@@ -7,9 +7,15 @@ repos:
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
# Ruff version.
|
||||
rev: v0.2.2
|
||||
rev: v0.9.9
|
||||
hooks:
|
||||
- id: ruff
|
||||
# - repo: https://github.com/RobertCraigie/pyright-python
|
||||
# rev: v1.1.395
|
||||
# hooks:
|
||||
# - id: pyright
|
||||
# args: ["--project", "python"]
|
||||
# additional_dependencies: [pyarrow-stubs]
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: local-biome-check
|
||||
|
||||
1345
Cargo.lock
generated
1345
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
58
Cargo.toml
58
Cargo.toml
@@ -21,44 +21,52 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.23.0", "features" = [
|
||||
"dynamodb",
|
||||
], git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-io = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-index = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-linalg = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-table = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-testing = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-datafusion = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance-encoding = { version = "=0.23.0", git = "https://github.com/lancedb/lance.git", tag = "v0.23.0-beta.3" }
|
||||
lance = { "version" = "=0.24.1", "features" = ["dynamodb"] }
|
||||
lance-io = { version = "=0.24.1" }
|
||||
lance-index = { version = "=0.24.1" }
|
||||
lance-linalg = { version = "=0.24.1" }
|
||||
lance-table = { version = "=0.24.1" }
|
||||
lance-testing = { version = "=0.24.1" }
|
||||
lance-datafusion = { version = "=0.24.1" }
|
||||
lance-encoding = { version = "=0.24.1" }
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "53.2", optional = false }
|
||||
arrow-array = "53.2"
|
||||
arrow-data = "53.2"
|
||||
arrow-ipc = "53.2"
|
||||
arrow-ord = "53.2"
|
||||
arrow-schema = "53.2"
|
||||
arrow-arith = "53.2"
|
||||
arrow-cast = "53.2"
|
||||
arrow = { version = "54.1", optional = false }
|
||||
arrow-array = "54.1"
|
||||
arrow-data = "54.1"
|
||||
arrow-ipc = "54.1"
|
||||
arrow-ord = "54.1"
|
||||
arrow-schema = "54.1"
|
||||
arrow-arith = "54.1"
|
||||
arrow-cast = "54.1"
|
||||
async-trait = "0"
|
||||
chrono = "0.4.35"
|
||||
datafusion-common = "44.0"
|
||||
datafusion-physical-plan = "44.0"
|
||||
env_logger = "0.10"
|
||||
datafusion = { version = "45.0", default-features = false }
|
||||
datafusion-catalog = "45.0"
|
||||
datafusion-common = { version = "45.0", default-features = false }
|
||||
datafusion-execution = "45.0"
|
||||
datafusion-expr = "45.0"
|
||||
datafusion-physical-plan = "45.0"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
] }
|
||||
futures = "0"
|
||||
log = "0.4"
|
||||
moka = { version = "0.11", features = ["future"] }
|
||||
object_store = "0.10.2"
|
||||
moka = { version = "0.12", features = ["future"] }
|
||||
object_store = "0.11.0"
|
||||
pin-project = "1.0.7"
|
||||
snafu = "0.7.4"
|
||||
snafu = "0.8"
|
||||
url = "2"
|
||||
num-traits = "0.2"
|
||||
rand = "0.8"
|
||||
regex = "1.10"
|
||||
lazy_static = "1"
|
||||
semver = "1.0.25"
|
||||
|
||||
# Temporary pins to work around downstream issues
|
||||
# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
|
||||
chrono = "=0.4.39"
|
||||
# https://github.com/RustCrypto/formats/issues/1684
|
||||
base64ct = "=1.6.0"
|
||||
|
||||
# Workaround for: https://github.com/eira-fransham/crunchy/issues/13
|
||||
crunchy = "=0.2.2"
|
||||
|
||||
41
ci/parse_requirements.py
Normal file
41
ci/parse_requirements.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import argparse
|
||||
import toml
|
||||
|
||||
|
||||
def parse_dependencies(pyproject_path, extras=None):
|
||||
with open(pyproject_path, "r") as file:
|
||||
pyproject = toml.load(file)
|
||||
|
||||
dependencies = pyproject.get("project", {}).get("dependencies", [])
|
||||
for dependency in dependencies:
|
||||
print(dependency)
|
||||
|
||||
optional_dependencies = pyproject.get("project", {}).get(
|
||||
"optional-dependencies", {}
|
||||
)
|
||||
|
||||
if extras:
|
||||
for extra in extras.split(","):
|
||||
for dep in optional_dependencies.get(extra, []):
|
||||
print(dep)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate requirements.txt from pyproject.toml"
|
||||
)
|
||||
parser.add_argument("path", type=str, help="Path to pyproject.toml")
|
||||
parser.add_argument(
|
||||
"--extras",
|
||||
type=str,
|
||||
help="Comma-separated list of extras to include",
|
||||
default="",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
parse_dependencies(args.path, args.extras)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -4,6 +4,9 @@ repo_url: https://github.com/lancedb/lancedb
|
||||
edit_uri: https://github.com/lancedb/lancedb/tree/main/docs/src
|
||||
repo_name: lancedb/lancedb
|
||||
docs_dir: src
|
||||
watch:
|
||||
- src
|
||||
- ../python/python
|
||||
|
||||
theme:
|
||||
name: "material"
|
||||
@@ -63,6 +66,7 @@ plugins:
|
||||
- https://arrow.apache.org/docs/objects.inv
|
||||
- https://pandas.pydata.org/docs/objects.inv
|
||||
- https://lancedb.github.io/lance/objects.inv
|
||||
- https://docs.pydantic.dev/latest/objects.inv
|
||||
- mkdocs-jupyter
|
||||
- render_swagger:
|
||||
allow_arbitrary_locations: true
|
||||
@@ -178,6 +182,7 @@ nav:
|
||||
- Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
|
||||
- Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
|
||||
- User-defined embedding functions: embeddings/custom_embedding_function.md
|
||||
- Variables and secrets: embeddings/variables_and_secrets.md
|
||||
- "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
|
||||
- "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
|
||||
- 🔌 Integrations:
|
||||
@@ -311,6 +316,7 @@ nav:
|
||||
- Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
|
||||
- Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
|
||||
- User-defined embedding functions: embeddings/custom_embedding_function.md
|
||||
- Variables and secrets: embeddings/variables_and_secrets.md
|
||||
- "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
|
||||
- "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
|
||||
- Integrations:
|
||||
@@ -371,6 +377,7 @@ extra_css:
|
||||
|
||||
extra_javascript:
|
||||
- "extra_js/init_ask_ai_widget.js"
|
||||
- "extra_js/reo.js"
|
||||
|
||||
extra:
|
||||
analytics:
|
||||
|
||||
@@ -38,6 +38,13 @@ components:
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
index_name:
|
||||
name: index_name
|
||||
in: path
|
||||
description: name of the index
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
invalid_request:
|
||||
description: Invalid request
|
||||
@@ -485,3 +492,22 @@ paths:
|
||||
$ref: "#/components/responses/unauthorized"
|
||||
"404":
|
||||
$ref: "#/components/responses/not_found"
|
||||
/v1/table/{name}/index/{index_name}/drop/:
|
||||
post:
|
||||
description: Drop an index from the table
|
||||
tags:
|
||||
- Tables
|
||||
summary: Drop an index from the table
|
||||
operationId: dropIndex
|
||||
parameters:
|
||||
- $ref: "#/components/parameters/table_name"
|
||||
- $ref: "#/components/parameters/index_name"
|
||||
responses:
|
||||
"200":
|
||||
description: Index successfully dropped
|
||||
"400":
|
||||
$ref: "#/components/responses/invalid_request"
|
||||
"401":
|
||||
$ref: "#/components/responses/unauthorized"
|
||||
"404":
|
||||
$ref: "#/components/responses/not_found"
|
||||
@@ -3,6 +3,7 @@ import * as vectordb from "vectordb";
|
||||
// --8<-- [end:import]
|
||||
|
||||
(async () => {
|
||||
console.log("ann_indexes.ts: start");
|
||||
// --8<-- [start:ingest]
|
||||
const db = await vectordb.connect("data/sample-lancedb");
|
||||
|
||||
@@ -49,5 +50,5 @@ import * as vectordb from "vectordb";
|
||||
.execute();
|
||||
// --8<-- [end:search3]
|
||||
|
||||
console.log("Ann indexes: done");
|
||||
console.log("ann_indexes.ts: done");
|
||||
})();
|
||||
|
||||
@@ -133,11 +133,20 @@ recommend switching to stable releases.
|
||||
## Connect to a database
|
||||
|
||||
=== "Python"
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:imports"
|
||||
--8<-- "python/python/tests/docs/test_basic.py:connect"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_basic.py:set_uri"
|
||||
--8<-- "python/python/tests/docs/test_basic.py:connect"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:imports"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_basic.py:set_uri"
|
||||
--8<-- "python/python/tests/docs/test_basic.py:connect_async"
|
||||
```
|
||||
|
||||
@@ -183,19 +192,31 @@ table.
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_table"
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_table_async"
|
||||
```
|
||||
|
||||
If the table already exists, LanceDB will raise an error by default.
|
||||
If you want to overwrite the table, you can pass in `mode="overwrite"`
|
||||
to the `create_table` method.
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_table"
|
||||
```
|
||||
|
||||
You can also pass in a pandas DataFrame directly:
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_table_pandas"
|
||||
```
|
||||
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_table_async"
|
||||
```
|
||||
|
||||
You can also pass in a pandas DataFrame directly:
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_table_async_pandas"
|
||||
```
|
||||
|
||||
@@ -247,8 +268,14 @@ similar to a `CREATE TABLE` statement in SQL.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_empty_table"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_empty_table_async"
|
||||
```
|
||||
|
||||
@@ -281,8 +308,14 @@ Once created, you can open a table as follows:
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:open_table"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:open_table_async"
|
||||
```
|
||||
|
||||
@@ -310,8 +343,14 @@ If you forget the name of your table, you can always get a listing of all table
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:table_names"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:table_names_async"
|
||||
```
|
||||
|
||||
@@ -340,8 +379,14 @@ After a table has been created, you can always add more data to it as follows:
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:add_data"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:add_data_async"
|
||||
```
|
||||
|
||||
@@ -370,8 +415,14 @@ Once you've embedded the query, you can find its nearest neighbors as follows:
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:vector_search"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:vector_search_async"
|
||||
```
|
||||
|
||||
@@ -412,8 +463,14 @@ LanceDB allows you to create an ANN index on a table as follows:
|
||||
|
||||
=== "Python"
|
||||
|
||||
```py
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_index"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:create_index_async"
|
||||
```
|
||||
|
||||
@@ -451,8 +508,14 @@ This can delete any number of rows that match the filter.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:delete_rows"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:delete_rows_async"
|
||||
```
|
||||
|
||||
@@ -483,7 +546,10 @@ simple or complex as needed. To see what expressions are supported, see the
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
Read more: [lancedb.table.Table.delete][]
|
||||
=== "Async API"
|
||||
Read more: [lancedb.table.AsyncTable.delete][]
|
||||
|
||||
=== "Typescript[^1]"
|
||||
|
||||
@@ -505,8 +571,14 @@ Use the `drop_table()` method on the database to remove a table.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:drop_table"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_basic.py:drop_table_async"
|
||||
```
|
||||
|
||||
@@ -543,10 +615,17 @@ You can use the embedding API when working with embedding models. It automatical
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_embeddings_optional.py:imports"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_embeddings_optional.py:openai_embeddings"
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
Coming soon to the async API.
|
||||
https://github.com/lancedb/lancedb/issues/1938
|
||||
|
||||
=== "Typescript[^1]"
|
||||
|
||||
|
||||
@@ -107,7 +107,6 @@ const example = async () => {
|
||||
// --8<-- [start:search]
|
||||
const query = await tbl.search([100, 100]).limit(2).execute();
|
||||
// --8<-- [end:search]
|
||||
console.log(query);
|
||||
|
||||
// --8<-- [start:delete]
|
||||
await tbl.delete('item = "fizz"');
|
||||
@@ -119,8 +118,9 @@ const example = async () => {
|
||||
};
|
||||
|
||||
async function main() {
|
||||
console.log("basic_legacy.ts: start");
|
||||
await example();
|
||||
console.log("Basic example: done");
|
||||
console.log("basic_legacy.ts: done");
|
||||
}
|
||||
|
||||
main();
|
||||
|
||||
@@ -55,6 +55,14 @@ Let's implement `SentenceTransformerEmbeddings` class. All you need to do is imp
|
||||
|
||||
This is a stripped down version of our implementation of `SentenceTransformerEmbeddings` that removes certain optimizations and default settings.
|
||||
|
||||
!!! danger "Use sensitive keys to prevent leaking secrets"
|
||||
To prevent leaking secrets, such as API keys, you should add any sensitive
|
||||
parameters of an embedding function to the output of the
|
||||
[sensitive_keys()][lancedb.embeddings.base.EmbeddingFunction.sensitive_keys] /
|
||||
[getSensitiveKeys()](../../js/namespaces/embedding/classes/EmbeddingFunction/#getsensitivekeys)
|
||||
method. This prevents users from accidentally instantiating the embedding
|
||||
function with hard-coded secrets.
|
||||
|
||||
Now you can use this embedding function to create your table schema and that's it! you can then ingest data and run queries without manually vectorizing the inputs.
|
||||
|
||||
=== "Python"
|
||||
|
||||
53
docs/src/embeddings/variables_and_secrets.md
Normal file
53
docs/src/embeddings/variables_and_secrets.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# Variable and Secrets
|
||||
|
||||
Most embedding configuration options are saved in the table's metadata. However,
|
||||
this isn't always appropriate. For example, API keys should never be stored in the
|
||||
metadata. Additionally, other configuration options might be best set at runtime,
|
||||
such as the `device` configuration that controls whether to use GPU or CPU for
|
||||
inference. If you hardcoded this to GPU, you wouldn't be able to run the code on
|
||||
a server without one.
|
||||
|
||||
To handle these cases, you can set variables on the embedding registry and
|
||||
reference them in the embedding configuration. These variables will be available
|
||||
during the runtime of your program, but not saved in the table's metadata. When
|
||||
the table is loaded from a different process, the variables must be set again.
|
||||
|
||||
To set a variable, use the `set_var()` / `setVar()` method on the embedding registry.
|
||||
To reference a variable, use the syntax `$env:VARIABLE_NAME`. If there is a default
|
||||
value, you can use the syntax `$env:VARIABLE_NAME:DEFAULT_VALUE`.
|
||||
|
||||
## Using variables to set secrets
|
||||
|
||||
Sensitive configuration, such as API keys, must either be set as environment
|
||||
variables or using variables on the embedding registry. If you pass in a hardcoded
|
||||
value, LanceDB will raise an error. Instead, if you want to set an API key via
|
||||
configuration, use a variable:
|
||||
|
||||
=== "Python"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_embeddings_optional.py:register_secret"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/embedding.test.ts:register_secret"
|
||||
```
|
||||
|
||||
## Using variables to set the device parameter
|
||||
|
||||
Many embedding functions that run locally have a `device` parameter that controls
|
||||
whether to use GPU or CPU for inference. Because not all computers have a GPU,
|
||||
it's helpful to be able to set the `device` parameter at runtime, rather than
|
||||
have it hard coded in the embedding configuration. To make it work even if the
|
||||
variable isn't set, you could provide a default value of `cpu` in the embedding
|
||||
configuration.
|
||||
|
||||
Some embedding libraries even have a method to detect which devices are available,
|
||||
which could be used to dynamically set the device at runtime. For example, in Python
|
||||
you can check if a CUDA GPU is available using `torch.cuda.is_available()`.
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_embeddings_optional.py:register_device"
|
||||
```
|
||||
1
docs/src/extra_js/reo.js
Normal file
1
docs/src/extra_js/reo.js
Normal file
@@ -0,0 +1 @@
|
||||
!function(){var e,t,n;e="9627b71b382d201",t=function(){Reo.init({clientID:"9627b71b382d201"})},(n=document.createElement("script")).src="https://static.reo.dev/"+e+"/reo.js",n.defer=!0,n.onload=t,document.head.appendChild(n)}();
|
||||
@@ -601,6 +601,38 @@ After a table has been created, you can always add more data to it using the `ad
|
||||
)
|
||||
```
|
||||
|
||||
## Upserting into a table
|
||||
|
||||
Upserting lets you insert new rows or update existing rows in a table. To upsert
|
||||
in LanceDB, use the merge insert API.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic"
|
||||
```
|
||||
**API Reference**: [lancedb.table.Table.merge_insert][]
|
||||
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic_async"
|
||||
```
|
||||
**API Reference**: [lancedb.table.AsyncTable.merge_insert][]
|
||||
|
||||
=== "Typescript[^1]"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/merge_insert.test.ts:upsert_basic"
|
||||
```
|
||||
**API Reference**: [lancedb.Table.mergeInsert](../js/classes/Table.md/#mergeInsert)
|
||||
|
||||
Read more in the guide on [merge insert](tables/merge_insert.md).
|
||||
|
||||
## Deleting from a table
|
||||
|
||||
Use the `delete()` method on tables to delete rows from a table. To choose which rows to delete, provide a filter that matches on the metadata columns. This can delete any number of rows that match the filter.
|
||||
|
||||
135
docs/src/guides/tables/merge_insert.md
Normal file
135
docs/src/guides/tables/merge_insert.md
Normal file
@@ -0,0 +1,135 @@
|
||||
The merge insert command is a flexible API that can be used to perform:
|
||||
|
||||
1. Upsert
|
||||
2. Insert-if-not-exists
|
||||
3. Replace range
|
||||
|
||||
It works by joining the input data with the target table on a key you provide.
|
||||
Often this key is a unique row id key. You can then specify what to do when
|
||||
there is a match and when there is not a match. For example, for upsert you want
|
||||
to update if the row has a match and insert if the row doesn't have a match.
|
||||
Whereas for insert-if-not-exists you only want to insert if the row doesn't have
|
||||
a match.
|
||||
|
||||
You can also read more in the API reference:
|
||||
|
||||
* Python
|
||||
* Sync: [lancedb.table.Table.merge_insert][]
|
||||
* Async: [lancedb.table.AsyncTable.merge_insert][]
|
||||
* Typescript: [lancedb.Table.mergeInsert](../../js/classes/Table.md/#mergeinsert)
|
||||
|
||||
!!! tip "Use scalar indices to speed up merge insert"
|
||||
|
||||
The merge insert command needs to perform a join between the input data and the
|
||||
target table on the `on` key you provide. This requires scanning that entire
|
||||
column, which can be expensive for large tables. To speed up this operation,
|
||||
you can create a scalar index on the `on` column, which will allow LanceDB to
|
||||
find matches without having to scan the whole tables.
|
||||
|
||||
Read more about scalar indices in [Building a Scalar Index](../scalar_index.md)
|
||||
guide.
|
||||
|
||||
!!! info "Embedding Functions"
|
||||
|
||||
Like the create table and add APIs, the merge insert API will automatically
|
||||
compute embeddings if the table has a embedding definition in its schema.
|
||||
If the input data doesn't contain the source column, or the vector column
|
||||
is already filled, then the embeddings won't be computed. See the
|
||||
[Embedding Functions](../../embeddings/embedding_functions.md) guide for more
|
||||
information.
|
||||
|
||||
## Upsert
|
||||
|
||||
Upsert updates rows if they exist and inserts them if they don't. To do this
|
||||
with merge insert, enable both `when_matched_update_all()` and
|
||||
`when_not_matched_insert_all()`.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic"
|
||||
```
|
||||
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_merge_insert.py:upsert_basic_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/merge_insert.test.ts:upsert_basic"
|
||||
```
|
||||
|
||||
!!! note "Providing subsets of columns"
|
||||
|
||||
If a column is nullable, it can be omitted from input data and it will be
|
||||
considered `null`. Columns can also be provided in any order.
|
||||
|
||||
## Insert-if-not-exists
|
||||
|
||||
To avoid inserting duplicate rows, you can use the insert-if-not-exists command.
|
||||
This will only insert rows that do not have a match in the target table. To do
|
||||
this with merge insert, enable just `when_not_matched_insert_all()`.
|
||||
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_merge_insert.py:insert_if_not_exists"
|
||||
```
|
||||
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_merge_insert.py:insert_if_not_exists_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/merge_insert.test.ts:insert_if_not_exists"
|
||||
```
|
||||
|
||||
|
||||
## Replace range
|
||||
|
||||
You can also replace a range of rows in the target table with the input data.
|
||||
For example, if you have a table of document chunks, where each chunk has
|
||||
both a `doc_id` and a `chunk_id`, you can replace all chunks for a given
|
||||
`doc_id` with updated chunks. This can be tricky otherwise because if you
|
||||
try to use upsert when the new data has fewer chunks you will end up with
|
||||
extra chunks. To avoid this, add another clause to delete any chunks for
|
||||
the document that are not in the new data, with
|
||||
`when_not_matched_by_source_delete`.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_merge_insert.py:replace_range"
|
||||
```
|
||||
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_merge_insert.py:replace_range_async"
|
||||
```
|
||||
|
||||
=== "Typescript"
|
||||
|
||||
=== "@lancedb/lancedb"
|
||||
|
||||
```typescript
|
||||
--8<-- "nodejs/examples/merge_insert.test.ts:replace_range"
|
||||
```
|
||||
@@ -36,7 +36,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](../basic.md) contains a more complete example.
|
||||
The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -23,18 +23,6 @@ be closed when they are garbage collected.
|
||||
Any created tables are independent and will continue to work even if
|
||||
the underlying connection has been closed.
|
||||
|
||||
## Constructors
|
||||
|
||||
### new Connection()
|
||||
|
||||
```ts
|
||||
new Connection(): Connection
|
||||
```
|
||||
|
||||
#### Returns
|
||||
|
||||
[`Connection`](Connection.md)
|
||||
|
||||
## Methods
|
||||
|
||||
### close()
|
||||
@@ -71,7 +59,7 @@ Creates a new empty Table
|
||||
* **name**: `string`
|
||||
The name of the table.
|
||||
|
||||
* **schema**: `SchemaLike`
|
||||
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
|
||||
The schema of the table
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
@@ -117,7 +105,7 @@ Creates a new Table and initialize it with new data.
|
||||
* **name**: `string`
|
||||
The name of the table.
|
||||
|
||||
* **data**: `TableLike` \| `Record`<`string`, `unknown`>[]
|
||||
* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`<`string`, `unknown`>[]
|
||||
Non-empty Array of Records
|
||||
to be inserted into the table
|
||||
|
||||
@@ -143,6 +131,20 @@ Return a brief description of the connection
|
||||
|
||||
***
|
||||
|
||||
### dropAllTables()
|
||||
|
||||
```ts
|
||||
abstract dropAllTables(): Promise<void>
|
||||
```
|
||||
|
||||
Drop all tables in the database.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### dropTable()
|
||||
|
||||
```ts
|
||||
@@ -189,7 +191,7 @@ Open a table in the database.
|
||||
* **name**: `string`
|
||||
The name of the table
|
||||
|
||||
* **options?**: `Partial`<`OpenTableOptions`>
|
||||
* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
|
||||
@@ -72,11 +72,9 @@ The results of a full text search are ordered by relevance measured by BM25.
|
||||
|
||||
You can combine filters with full text search.
|
||||
|
||||
For now, the full text search index only supports English, and doesn't support phrase search.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`FtsOptions`>
|
||||
* **options?**: `Partial`<[`FtsOptions`](../interfaces/FtsOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -98,7 +96,7 @@ the vectors.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`HnswPqOptions`>
|
||||
* **options?**: `Partial`<[`HnswPqOptions`](../interfaces/HnswPqOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -120,7 +118,7 @@ the vectors.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`HnswSqOptions`>
|
||||
* **options?**: `Partial`<[`HnswSqOptions`](../interfaces/HnswSqOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
|
||||
126
docs/src/js/classes/MergeInsertBuilder.md
Normal file
126
docs/src/js/classes/MergeInsertBuilder.md
Normal file
@@ -0,0 +1,126 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / MergeInsertBuilder
|
||||
|
||||
# Class: MergeInsertBuilder
|
||||
|
||||
A builder used to create and run a merge insert operation
|
||||
|
||||
## Constructors
|
||||
|
||||
### new MergeInsertBuilder()
|
||||
|
||||
```ts
|
||||
new MergeInsertBuilder(native, schema): MergeInsertBuilder
|
||||
```
|
||||
|
||||
Construct a MergeInsertBuilder. __Internal use only.__
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **native**: `NativeMergeInsertBuilder`
|
||||
|
||||
* **schema**: `Schema`<`any`> \| `Promise`<`Schema`<`any`>>
|
||||
|
||||
#### Returns
|
||||
|
||||
[`MergeInsertBuilder`](MergeInsertBuilder.md)
|
||||
|
||||
## Methods
|
||||
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
execute(data): Promise<void>
|
||||
```
|
||||
|
||||
Executes the merge insert operation
|
||||
|
||||
Nothing is returned but the `Table` is updated
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **data**: [`Data`](../type-aliases/Data.md)
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### whenMatchedUpdateAll()
|
||||
|
||||
```ts
|
||||
whenMatchedUpdateAll(options?): MergeInsertBuilder
|
||||
```
|
||||
|
||||
Rows that exist in both the source table (new data) and
|
||||
the target table (old data) will be updated, replacing
|
||||
the old row with the corresponding matching row.
|
||||
|
||||
If there are multiple matches then the behavior is undefined.
|
||||
Currently this causes multiple copies of the row to be created
|
||||
but that behavior is subject to change.
|
||||
|
||||
An optional condition may be specified. If it is, then only
|
||||
matched rows that satisfy the condtion will be updated. Any
|
||||
rows that do not satisfy the condition will be left as they
|
||||
are. Failing to satisfy the condition does not cause a
|
||||
"matched row" to become a "not matched" row.
|
||||
|
||||
The condition should be an SQL string. Use the prefix
|
||||
target. to refer to rows in the target table (old data)
|
||||
and the prefix source. to refer to rows in the source
|
||||
table (new data).
|
||||
|
||||
For example, "target.last_update < source.last_update"
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**
|
||||
|
||||
* **options.where?**: `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
[`MergeInsertBuilder`](MergeInsertBuilder.md)
|
||||
|
||||
***
|
||||
|
||||
### whenNotMatchedBySourceDelete()
|
||||
|
||||
```ts
|
||||
whenNotMatchedBySourceDelete(options?): MergeInsertBuilder
|
||||
```
|
||||
|
||||
Rows that exist only in the target table (old data) will be
|
||||
deleted. An optional condition can be provided to limit what
|
||||
data is deleted.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**
|
||||
|
||||
* **options.where?**: `string`
|
||||
An optional condition to limit what data is deleted
|
||||
|
||||
#### Returns
|
||||
|
||||
[`MergeInsertBuilder`](MergeInsertBuilder.md)
|
||||
|
||||
***
|
||||
|
||||
### whenNotMatchedInsertAll()
|
||||
|
||||
```ts
|
||||
whenNotMatchedInsertAll(): MergeInsertBuilder
|
||||
```
|
||||
|
||||
Rows that exist only in the source table (new data) should
|
||||
be inserted into the target table.
|
||||
|
||||
#### Returns
|
||||
|
||||
[`MergeInsertBuilder`](MergeInsertBuilder.md)
|
||||
@@ -8,30 +8,14 @@
|
||||
|
||||
A builder for LanceDB queries.
|
||||
|
||||
## See
|
||||
|
||||
[Table#query](Table.md#query), [Table#search](Table.md#search)
|
||||
|
||||
## Extends
|
||||
|
||||
- [`QueryBase`](QueryBase.md)<`NativeQuery`>
|
||||
|
||||
## Constructors
|
||||
|
||||
### new Query()
|
||||
|
||||
```ts
|
||||
new Query(tbl): Query
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **tbl**: `Table`
|
||||
|
||||
#### Returns
|
||||
|
||||
[`Query`](Query.md)
|
||||
|
||||
#### Overrides
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`constructor`](QueryBase.md#constructors)
|
||||
|
||||
## Properties
|
||||
|
||||
### inner
|
||||
@@ -46,42 +30,6 @@ protected inner: Query | Promise<Query>;
|
||||
|
||||
## Methods
|
||||
|
||||
### \[asyncIterator\]()
|
||||
|
||||
```ts
|
||||
asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
|
||||
```
|
||||
|
||||
#### Returns
|
||||
|
||||
`AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`>
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`[asyncIterator]`](QueryBase.md#%5Basynciterator%5D)
|
||||
|
||||
***
|
||||
|
||||
### doCall()
|
||||
|
||||
```ts
|
||||
protected doCall(fn): void
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **fn**
|
||||
|
||||
#### Returns
|
||||
|
||||
`void`
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`doCall`](QueryBase.md#docall)
|
||||
|
||||
***
|
||||
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
@@ -92,7 +40,7 @@ Execute the query and return the results as an
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -161,7 +109,7 @@ fastSearch(): this
|
||||
Skip searching un-indexed data. This can make search faster, but will miss
|
||||
any data that is not yet indexed.
|
||||
|
||||
Use lancedb.Table#optimize to index all un-indexed data.
|
||||
Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -189,7 +137,7 @@ A filter statement to be applied to this query.
|
||||
|
||||
`this`
|
||||
|
||||
#### Alias
|
||||
#### See
|
||||
|
||||
where
|
||||
|
||||
@@ -213,7 +161,7 @@ fullTextSearch(query, options?): this
|
||||
|
||||
* **query**: `string`
|
||||
|
||||
* **options?**: `Partial`<`FullTextSearchOptions`>
|
||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -250,26 +198,6 @@ called then every valid row from the table will be returned.
|
||||
|
||||
***
|
||||
|
||||
### nativeExecute()
|
||||
|
||||
```ts
|
||||
protected nativeExecute(options?): Promise<RecordBatchIterator>
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`RecordBatchIterator`>
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`nativeExecute`](QueryBase.md#nativeexecute)
|
||||
|
||||
***
|
||||
|
||||
### nearestTo()
|
||||
|
||||
```ts
|
||||
@@ -294,7 +222,7 @@ If there is more than one vector column you must use
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **vector**: `IntoVector`
|
||||
* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -427,7 +355,7 @@ Collect the results as an array of objects.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -449,7 +377,7 @@ Collect the results as an Arrow
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
|
||||
@@ -8,6 +8,11 @@
|
||||
|
||||
Common methods supported by all query types
|
||||
|
||||
## See
|
||||
|
||||
- [Query](Query.md)
|
||||
- [VectorQuery](VectorQuery.md)
|
||||
|
||||
## Extended by
|
||||
|
||||
- [`Query`](Query.md)
|
||||
@@ -21,22 +26,6 @@ Common methods supported by all query types
|
||||
|
||||
- `AsyncIterable`<`RecordBatch`>
|
||||
|
||||
## Constructors
|
||||
|
||||
### new QueryBase()
|
||||
|
||||
```ts
|
||||
protected new QueryBase<NativeQueryType>(inner): QueryBase<NativeQueryType>
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **inner**: `NativeQueryType` \| `Promise`<`NativeQueryType`>
|
||||
|
||||
#### Returns
|
||||
|
||||
[`QueryBase`](QueryBase.md)<`NativeQueryType`>
|
||||
|
||||
## Properties
|
||||
|
||||
### inner
|
||||
@@ -47,38 +36,6 @@ protected inner: NativeQueryType | Promise<NativeQueryType>;
|
||||
|
||||
## Methods
|
||||
|
||||
### \[asyncIterator\]()
|
||||
|
||||
```ts
|
||||
asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
|
||||
```
|
||||
|
||||
#### Returns
|
||||
|
||||
`AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
`AsyncIterable.[asyncIterator]`
|
||||
|
||||
***
|
||||
|
||||
### doCall()
|
||||
|
||||
```ts
|
||||
protected doCall(fn): void
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **fn**
|
||||
|
||||
#### Returns
|
||||
|
||||
`void`
|
||||
|
||||
***
|
||||
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
@@ -89,7 +46,7 @@ Execute the query and return the results as an
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -150,7 +107,7 @@ fastSearch(): this
|
||||
Skip searching un-indexed data. This can make search faster, but will miss
|
||||
any data that is not yet indexed.
|
||||
|
||||
Use lancedb.Table#optimize to index all un-indexed data.
|
||||
Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -174,7 +131,7 @@ A filter statement to be applied to this query.
|
||||
|
||||
`this`
|
||||
|
||||
#### Alias
|
||||
#### See
|
||||
|
||||
where
|
||||
|
||||
@@ -194,7 +151,7 @@ fullTextSearch(query, options?): this
|
||||
|
||||
* **query**: `string`
|
||||
|
||||
* **options?**: `Partial`<`FullTextSearchOptions`>
|
||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -223,22 +180,6 @@ called then every valid row from the table will be returned.
|
||||
|
||||
***
|
||||
|
||||
### nativeExecute()
|
||||
|
||||
```ts
|
||||
protected nativeExecute(options?): Promise<RecordBatchIterator>
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`RecordBatchIterator`>
|
||||
|
||||
***
|
||||
|
||||
### offset()
|
||||
|
||||
```ts
|
||||
@@ -314,7 +255,7 @@ Collect the results as an array of objects.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -332,7 +273,7 @@ Collect the results as an Arrow
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
|
||||
@@ -14,21 +14,13 @@ will be freed when the Table is garbage collected. To eagerly free the cache yo
|
||||
can call the `close` method. Once the Table is closed, it cannot be used for any
|
||||
further operations.
|
||||
|
||||
Tables are created using the methods [Connection#createTable](Connection.md#createtable)
|
||||
and [Connection#createEmptyTable](Connection.md#createemptytable). Existing tables are opened
|
||||
using [Connection#openTable](Connection.md#opentable).
|
||||
|
||||
Closing a table is optional. It not closed, it will be closed when it is garbage
|
||||
collected.
|
||||
|
||||
## Constructors
|
||||
|
||||
### new Table()
|
||||
|
||||
```ts
|
||||
new Table(): Table
|
||||
```
|
||||
|
||||
#### Returns
|
||||
|
||||
[`Table`](Table.md)
|
||||
|
||||
## Accessors
|
||||
|
||||
### name
|
||||
@@ -216,6 +208,9 @@ Indices on vector columns will speed up vector searches.
|
||||
Indices on scalar columns will speed up filtering (in both
|
||||
vector and non-vector searches)
|
||||
|
||||
We currently don't support custom named indexes.
|
||||
The index name will always be `${column}_idx`.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **column**: `string`
|
||||
@@ -226,11 +221,6 @@ vector and non-vector searches)
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
#### Note
|
||||
|
||||
We currently don't support custom named indexes,
|
||||
The index name will always be `${column}_idx`
|
||||
|
||||
#### Examples
|
||||
|
||||
```ts
|
||||
@@ -329,18 +319,14 @@ Drop an index from the table.
|
||||
|
||||
* **name**: `string`
|
||||
The name of the index.
|
||||
This does not delete the index from disk, it just removes it from the table.
|
||||
To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index.
|
||||
Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
#### Note
|
||||
|
||||
This does not delete the index from disk, it just removes it from the table.
|
||||
To delete the index, run [Table#optimize](Table.md#optimize) after dropping the index.
|
||||
|
||||
Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
|
||||
|
||||
***
|
||||
|
||||
### indexStats()
|
||||
@@ -404,7 +390,7 @@ List all the versions of the table
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`Version`[]>
|
||||
`Promise`<[`Version`](../interfaces/Version.md)[]>
|
||||
|
||||
***
|
||||
|
||||
@@ -420,7 +406,7 @@ abstract mergeInsert(on): MergeInsertBuilder
|
||||
|
||||
#### Returns
|
||||
|
||||
`MergeInsertBuilder`
|
||||
[`MergeInsertBuilder`](MergeInsertBuilder.md)
|
||||
|
||||
***
|
||||
|
||||
@@ -464,7 +450,7 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`OptimizeStats`>
|
||||
`Promise`<[`OptimizeStats`](../interfaces/OptimizeStats.md)>
|
||||
|
||||
***
|
||||
|
||||
@@ -581,7 +567,7 @@ Get the schema of the table.
|
||||
abstract search(
|
||||
query,
|
||||
queryType?,
|
||||
ftsColumns?): VectorQuery | Query
|
||||
ftsColumns?): Query | VectorQuery
|
||||
```
|
||||
|
||||
Create a search query to find the nearest neighbors
|
||||
@@ -589,7 +575,7 @@ of the given query
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **query**: `string` \| `IntoVector`
|
||||
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md)
|
||||
the query, a vector or string
|
||||
|
||||
* **queryType?**: `string`
|
||||
@@ -603,7 +589,7 @@ of the given query
|
||||
|
||||
#### Returns
|
||||
|
||||
[`VectorQuery`](VectorQuery.md) \| [`Query`](Query.md)
|
||||
[`Query`](Query.md) \| [`VectorQuery`](VectorQuery.md)
|
||||
|
||||
***
|
||||
|
||||
@@ -722,7 +708,7 @@ by `query`.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **vector**: `IntoVector`
|
||||
* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -745,38 +731,3 @@ Retrieve the version of the table
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`>
|
||||
|
||||
***
|
||||
|
||||
### parseTableData()
|
||||
|
||||
```ts
|
||||
static parseTableData(
|
||||
data,
|
||||
options?,
|
||||
streaming?): Promise<object>
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **data**: `TableLike` \| `Record`<`string`, `unknown`>[]
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
|
||||
* **streaming?**: `boolean` = `false`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`object`>
|
||||
|
||||
##### buf
|
||||
|
||||
```ts
|
||||
buf: Buffer;
|
||||
```
|
||||
|
||||
##### mode
|
||||
|
||||
```ts
|
||||
mode: string;
|
||||
```
|
||||
|
||||
@@ -10,30 +10,14 @@ A builder used to construct a vector search
|
||||
|
||||
This builder can be reused to execute the query many times.
|
||||
|
||||
## See
|
||||
|
||||
[Query#nearestTo](Query.md#nearestto)
|
||||
|
||||
## Extends
|
||||
|
||||
- [`QueryBase`](QueryBase.md)<`NativeVectorQuery`>
|
||||
|
||||
## Constructors
|
||||
|
||||
### new VectorQuery()
|
||||
|
||||
```ts
|
||||
new VectorQuery(inner): VectorQuery
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **inner**: `VectorQuery` \| `Promise`<`VectorQuery`>
|
||||
|
||||
#### Returns
|
||||
|
||||
[`VectorQuery`](VectorQuery.md)
|
||||
|
||||
#### Overrides
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`constructor`](QueryBase.md#constructors)
|
||||
|
||||
## Properties
|
||||
|
||||
### inner
|
||||
@@ -48,22 +32,6 @@ protected inner: VectorQuery | Promise<VectorQuery>;
|
||||
|
||||
## Methods
|
||||
|
||||
### \[asyncIterator\]()
|
||||
|
||||
```ts
|
||||
asyncIterator: AsyncIterator<RecordBatch<any>, any, undefined>
|
||||
```
|
||||
|
||||
#### Returns
|
||||
|
||||
`AsyncIterator`<`RecordBatch`<`any`>, `any`, `undefined`>
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`[asyncIterator]`](QueryBase.md#%5Basynciterator%5D)
|
||||
|
||||
***
|
||||
|
||||
### addQueryVector()
|
||||
|
||||
```ts
|
||||
@@ -72,7 +40,7 @@ addQueryVector(vector): VectorQuery
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **vector**: `IntoVector`
|
||||
* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -179,26 +147,6 @@ By default "l2" is used.
|
||||
|
||||
***
|
||||
|
||||
### doCall()
|
||||
|
||||
```ts
|
||||
protected doCall(fn): void
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **fn**
|
||||
|
||||
#### Returns
|
||||
|
||||
`void`
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`doCall`](QueryBase.md#docall)
|
||||
|
||||
***
|
||||
|
||||
### ef()
|
||||
|
||||
```ts
|
||||
@@ -233,7 +181,7 @@ Execute the query and return the results as an
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -302,7 +250,7 @@ fastSearch(): this
|
||||
Skip searching un-indexed data. This can make search faster, but will miss
|
||||
any data that is not yet indexed.
|
||||
|
||||
Use lancedb.Table#optimize to index all un-indexed data.
|
||||
Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -330,7 +278,7 @@ A filter statement to be applied to this query.
|
||||
|
||||
`this`
|
||||
|
||||
#### Alias
|
||||
#### See
|
||||
|
||||
where
|
||||
|
||||
@@ -354,7 +302,7 @@ fullTextSearch(query, options?): this
|
||||
|
||||
* **query**: `string`
|
||||
|
||||
* **options?**: `Partial`<`FullTextSearchOptions`>
|
||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -391,26 +339,6 @@ called then every valid row from the table will be returned.
|
||||
|
||||
***
|
||||
|
||||
### nativeExecute()
|
||||
|
||||
```ts
|
||||
protected nativeExecute(options?): Promise<RecordBatchIterator>
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`RecordBatchIterator`>
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`nativeExecute`](QueryBase.md#nativeexecute)
|
||||
|
||||
***
|
||||
|
||||
### nprobes()
|
||||
|
||||
```ts
|
||||
@@ -625,7 +553,7 @@ Collect the results as an array of objects.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -647,7 +575,7 @@ Collect the results as an Arrow
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<`QueryExecutionOptions`>
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / WriteMode
|
||||
|
||||
# Enumeration: WriteMode
|
||||
|
||||
Write mode for writing a table.
|
||||
|
||||
## Enumeration Members
|
||||
|
||||
### Append
|
||||
|
||||
```ts
|
||||
Append: "Append";
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### Create
|
||||
|
||||
```ts
|
||||
Create: "Create";
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### Overwrite
|
||||
|
||||
```ts
|
||||
Overwrite: "Overwrite";
|
||||
```
|
||||
@@ -6,10 +6,10 @@
|
||||
|
||||
# Function: connect()
|
||||
|
||||
## connect(uri, opts)
|
||||
## connect(uri, options)
|
||||
|
||||
```ts
|
||||
function connect(uri, opts?): Promise<Connection>
|
||||
function connect(uri, options?): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect to a LanceDB instance at the given URI.
|
||||
@@ -26,7 +26,8 @@ Accepted formats:
|
||||
The uri of the database. If the database uri starts
|
||||
with `db://` then it connects to a remote database.
|
||||
|
||||
* **opts?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
||||
* **options?**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)>
|
||||
The options to use when connecting to the database
|
||||
|
||||
### Returns
|
||||
|
||||
@@ -49,10 +50,10 @@ const conn = await connect(
|
||||
});
|
||||
```
|
||||
|
||||
## connect(opts)
|
||||
## connect(options)
|
||||
|
||||
```ts
|
||||
function connect(opts): Promise<Connection>
|
||||
function connect(options): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect to a LanceDB instance at the given URI.
|
||||
@@ -65,7 +66,8 @@ Accepted formats:
|
||||
|
||||
### Parameters
|
||||
|
||||
* **opts**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)> & `object`
|
||||
* **options**: `Partial`<[`ConnectionOptions`](../interfaces/ConnectionOptions.md)> & `object`
|
||||
The options to use when connecting to the database
|
||||
|
||||
### Returns
|
||||
|
||||
|
||||
@@ -22,8 +22,6 @@ when creating a table or adding data to it)
|
||||
This function converts an array of Record<String, any> (row-major JS objects)
|
||||
to an Arrow Table (a columnar structure)
|
||||
|
||||
Note that it currently does not support nulls.
|
||||
|
||||
If a schema is provided then it will be used to determine the resulting array
|
||||
types. Fields will also be reordered to fit the order defined by the schema.
|
||||
|
||||
@@ -31,6 +29,9 @@ If a schema is not provided then the types will be inferred and the field order
|
||||
will be controlled by the order of properties in the first record. If a type
|
||||
is inferred it will always be nullable.
|
||||
|
||||
If not all fields are found in the data, then a subset of the schema will be
|
||||
returned.
|
||||
|
||||
If the input is empty then a schema must be provided to create an empty table.
|
||||
|
||||
When a schema is not specified then data types will be inferred. The inference
|
||||
@@ -38,6 +39,7 @@ rules are as follows:
|
||||
|
||||
- boolean => Bool
|
||||
- number => Float64
|
||||
- bigint => Int64
|
||||
- String => Utf8
|
||||
- Buffer => Binary
|
||||
- Record<String, any> => Struct
|
||||
@@ -57,6 +59,7 @@ rules are as follows:
|
||||
|
||||
## Example
|
||||
|
||||
```ts
|
||||
import { fromTableToBuffer, makeArrowTable } from "../arrow";
|
||||
import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
|
||||
|
||||
@@ -78,7 +81,6 @@ The `vectorColumns` option can be used to support other vector column
|
||||
names and data types.
|
||||
|
||||
```ts
|
||||
|
||||
const schema = new Schema([
|
||||
new Field("a", new Float64()),
|
||||
new Field("b", new Float64()),
|
||||
@@ -97,8 +99,7 @@ const schema = new Schema([
|
||||
|
||||
You can specify the vector column types and names using the options as well
|
||||
|
||||
```typescript
|
||||
|
||||
```ts
|
||||
const schema = new Schema([
|
||||
new Field('a', new Float64()),
|
||||
new Field('b', new Float64()),
|
||||
|
||||
@@ -9,15 +9,12 @@
|
||||
- [embedding](namespaces/embedding/README.md)
|
||||
- [rerankers](namespaces/rerankers/README.md)
|
||||
|
||||
## Enumerations
|
||||
|
||||
- [WriteMode](enumerations/WriteMode.md)
|
||||
|
||||
## Classes
|
||||
|
||||
- [Connection](classes/Connection.md)
|
||||
- [Index](classes/Index.md)
|
||||
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
|
||||
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
|
||||
- [Query](classes/Query.md)
|
||||
- [QueryBase](classes/QueryBase.md)
|
||||
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
||||
@@ -31,23 +28,39 @@
|
||||
- [AddDataOptions](interfaces/AddDataOptions.md)
|
||||
- [ClientConfig](interfaces/ClientConfig.md)
|
||||
- [ColumnAlteration](interfaces/ColumnAlteration.md)
|
||||
- [CompactionStats](interfaces/CompactionStats.md)
|
||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||
- [FtsOptions](interfaces/FtsOptions.md)
|
||||
- [FullTextSearchOptions](interfaces/FullTextSearchOptions.md)
|
||||
- [HnswPqOptions](interfaces/HnswPqOptions.md)
|
||||
- [HnswSqOptions](interfaces/HnswSqOptions.md)
|
||||
- [IndexConfig](interfaces/IndexConfig.md)
|
||||
- [IndexOptions](interfaces/IndexOptions.md)
|
||||
- [IndexStatistics](interfaces/IndexStatistics.md)
|
||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
||||
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||
- [OptimizeStats](interfaces/OptimizeStats.md)
|
||||
- [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
|
||||
- [RemovalStats](interfaces/RemovalStats.md)
|
||||
- [RetryConfig](interfaces/RetryConfig.md)
|
||||
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
||||
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
||||
- [UpdateOptions](interfaces/UpdateOptions.md)
|
||||
- [WriteOptions](interfaces/WriteOptions.md)
|
||||
- [Version](interfaces/Version.md)
|
||||
|
||||
## Type Aliases
|
||||
|
||||
- [Data](type-aliases/Data.md)
|
||||
- [DataLike](type-aliases/DataLike.md)
|
||||
- [FieldLike](type-aliases/FieldLike.md)
|
||||
- [IntoSql](type-aliases/IntoSql.md)
|
||||
- [IntoVector](type-aliases/IntoVector.md)
|
||||
- [RecordBatchLike](type-aliases/RecordBatchLike.md)
|
||||
- [SchemaLike](type-aliases/SchemaLike.md)
|
||||
- [TableLike](type-aliases/TableLike.md)
|
||||
|
||||
## Functions
|
||||
|
||||
|
||||
@@ -8,6 +8,14 @@
|
||||
|
||||
## Properties
|
||||
|
||||
### extraHeaders?
|
||||
|
||||
```ts
|
||||
optional extraHeaders: Record<string, string>;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### retryConfig?
|
||||
|
||||
```ts
|
||||
|
||||
49
docs/src/js/interfaces/CompactionStats.md
Normal file
49
docs/src/js/interfaces/CompactionStats.md
Normal file
@@ -0,0 +1,49 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / CompactionStats
|
||||
|
||||
# Interface: CompactionStats
|
||||
|
||||
Statistics about a compaction operation.
|
||||
|
||||
## Properties
|
||||
|
||||
### filesAdded
|
||||
|
||||
```ts
|
||||
filesAdded: number;
|
||||
```
|
||||
|
||||
The number of new, compacted data files added
|
||||
|
||||
***
|
||||
|
||||
### filesRemoved
|
||||
|
||||
```ts
|
||||
filesRemoved: number;
|
||||
```
|
||||
|
||||
The number of data files removed
|
||||
|
||||
***
|
||||
|
||||
### fragmentsAdded
|
||||
|
||||
```ts
|
||||
fragmentsAdded: number;
|
||||
```
|
||||
|
||||
The number of new, compacted fragments added
|
||||
|
||||
***
|
||||
|
||||
### fragmentsRemoved
|
||||
|
||||
```ts
|
||||
fragmentsRemoved: number;
|
||||
```
|
||||
|
||||
The number of fragments removed
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
## Properties
|
||||
|
||||
### dataStorageVersion?
|
||||
### ~~dataStorageVersion?~~
|
||||
|
||||
```ts
|
||||
optional dataStorageVersion: string;
|
||||
@@ -19,6 +19,10 @@ The version of the data storage format to use.
|
||||
The default is `stable`.
|
||||
Set to "legacy" to use the old format.
|
||||
|
||||
#### Deprecated
|
||||
|
||||
Pass `new_table_data_storage_version` to storageOptions instead.
|
||||
|
||||
***
|
||||
|
||||
### embeddingFunction?
|
||||
@@ -29,7 +33,7 @@ optional embeddingFunction: EmbeddingFunctionConfig;
|
||||
|
||||
***
|
||||
|
||||
### enableV2ManifestPaths?
|
||||
### ~~enableV2ManifestPaths?~~
|
||||
|
||||
```ts
|
||||
optional enableV2ManifestPaths: boolean;
|
||||
@@ -41,6 +45,10 @@ turning this on will make the dataset unreadable for older versions
|
||||
of LanceDB (prior to 0.10.0). To migrate an existing dataset, instead
|
||||
use the LocalTable#migrateManifestPathsV2 method.
|
||||
|
||||
#### Deprecated
|
||||
|
||||
Pass `new_table_enable_v2_manifest_paths` to storageOptions instead.
|
||||
|
||||
***
|
||||
|
||||
### existOk
|
||||
@@ -90,17 +98,3 @@ Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
|
||||
***
|
||||
|
||||
### useLegacyFormat?
|
||||
|
||||
```ts
|
||||
optional useLegacyFormat: boolean;
|
||||
```
|
||||
|
||||
If true then data files will be written with the legacy format
|
||||
|
||||
The default is false.
|
||||
|
||||
Deprecated. Use data storage version instead.
|
||||
|
||||
103
docs/src/js/interfaces/FtsOptions.md
Normal file
103
docs/src/js/interfaces/FtsOptions.md
Normal file
@@ -0,0 +1,103 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / FtsOptions
|
||||
|
||||
# Interface: FtsOptions
|
||||
|
||||
Options to create a full text search index
|
||||
|
||||
## Properties
|
||||
|
||||
### asciiFolding?
|
||||
|
||||
```ts
|
||||
optional asciiFolding: boolean;
|
||||
```
|
||||
|
||||
whether to remove punctuation
|
||||
|
||||
***
|
||||
|
||||
### baseTokenizer?
|
||||
|
||||
```ts
|
||||
optional baseTokenizer: "raw" | "simple" | "whitespace";
|
||||
```
|
||||
|
||||
The tokenizer to use when building the index.
|
||||
The default is "simple".
|
||||
|
||||
The following tokenizers are available:
|
||||
|
||||
"simple" - Simple tokenizer. This tokenizer splits the text into tokens using whitespace and punctuation as a delimiter.
|
||||
|
||||
"whitespace" - Whitespace tokenizer. This tokenizer splits the text into tokens using whitespace as a delimiter.
|
||||
|
||||
"raw" - Raw tokenizer. This tokenizer does not split the text into tokens and indexes the entire text as a single token.
|
||||
|
||||
***
|
||||
|
||||
### language?
|
||||
|
||||
```ts
|
||||
optional language: string;
|
||||
```
|
||||
|
||||
language for stemming and stop words
|
||||
this is only used when `stem` or `remove_stop_words` is true
|
||||
|
||||
***
|
||||
|
||||
### lowercase?
|
||||
|
||||
```ts
|
||||
optional lowercase: boolean;
|
||||
```
|
||||
|
||||
whether to lowercase tokens
|
||||
|
||||
***
|
||||
|
||||
### maxTokenLength?
|
||||
|
||||
```ts
|
||||
optional maxTokenLength: number;
|
||||
```
|
||||
|
||||
maximum token length
|
||||
tokens longer than this length will be ignored
|
||||
|
||||
***
|
||||
|
||||
### removeStopWords?
|
||||
|
||||
```ts
|
||||
optional removeStopWords: boolean;
|
||||
```
|
||||
|
||||
whether to remove stop words
|
||||
|
||||
***
|
||||
|
||||
### stem?
|
||||
|
||||
```ts
|
||||
optional stem: boolean;
|
||||
```
|
||||
|
||||
whether to stem tokens
|
||||
|
||||
***
|
||||
|
||||
### withPosition?
|
||||
|
||||
```ts
|
||||
optional withPosition: boolean;
|
||||
```
|
||||
|
||||
Whether to build the index with positions.
|
||||
True by default.
|
||||
If set to false, the index will not store the positions of the tokens in the text,
|
||||
which will make the index smaller and faster to build, but will not support phrase queries.
|
||||
22
docs/src/js/interfaces/FullTextSearchOptions.md
Normal file
22
docs/src/js/interfaces/FullTextSearchOptions.md
Normal file
@@ -0,0 +1,22 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / FullTextSearchOptions
|
||||
|
||||
# Interface: FullTextSearchOptions
|
||||
|
||||
Options that control the behavior of a full text search
|
||||
|
||||
## Properties
|
||||
|
||||
### columns?
|
||||
|
||||
```ts
|
||||
optional columns: string | string[];
|
||||
```
|
||||
|
||||
The columns to search
|
||||
|
||||
If not specified, all indexed columns will be searched.
|
||||
For now, only one column can be searched.
|
||||
149
docs/src/js/interfaces/HnswPqOptions.md
Normal file
149
docs/src/js/interfaces/HnswPqOptions.md
Normal file
@@ -0,0 +1,149 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / HnswPqOptions
|
||||
|
||||
# Interface: HnswPqOptions
|
||||
|
||||
Options to create an `HNSW_PQ` index
|
||||
|
||||
## Properties
|
||||
|
||||
### distanceType?
|
||||
|
||||
```ts
|
||||
optional distanceType: "l2" | "cosine" | "dot";
|
||||
```
|
||||
|
||||
The distance metric used to train the index.
|
||||
|
||||
Default value is "l2".
|
||||
|
||||
The following distance types are available:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
***
|
||||
|
||||
### efConstruction?
|
||||
|
||||
```ts
|
||||
optional efConstruction: number;
|
||||
```
|
||||
|
||||
The number of candidates to evaluate during the construction of the HNSW graph.
|
||||
|
||||
The default value is 300.
|
||||
|
||||
This value controls the tradeoff between build speed and accuracy.
|
||||
The higher the value the more accurate the build but the slower it will be.
|
||||
150 to 300 is the typical range. 100 is a minimum for good quality search
|
||||
results. In most cases, there is no benefit to setting this higher than 500.
|
||||
This value should be set to a value that is not less than `ef` in the search phase.
|
||||
|
||||
***
|
||||
|
||||
### m?
|
||||
|
||||
```ts
|
||||
optional m: number;
|
||||
```
|
||||
|
||||
The number of neighbors to select for each vector in the HNSW graph.
|
||||
|
||||
The default value is 20.
|
||||
|
||||
This value controls the tradeoff between search speed and accuracy.
|
||||
The higher the value the more accurate the search but the slower it will be.
|
||||
|
||||
***
|
||||
|
||||
### maxIterations?
|
||||
|
||||
```ts
|
||||
optional maxIterations: number;
|
||||
```
|
||||
|
||||
Max iterations to train kmeans.
|
||||
|
||||
The default value is 50.
|
||||
|
||||
When training an IVF index we use kmeans to calculate the partitions. This parameter
|
||||
controls how many iterations of kmeans to run.
|
||||
|
||||
Increasing this might improve the quality of the index but in most cases the parameter
|
||||
is unused because kmeans will converge with fewer iterations. The parameter is only
|
||||
used in cases where kmeans does not appear to converge. In those cases it is unlikely
|
||||
that setting this larger will lead to the index converging anyways.
|
||||
|
||||
***
|
||||
|
||||
### numPartitions?
|
||||
|
||||
```ts
|
||||
optional numPartitions: number;
|
||||
```
|
||||
|
||||
The number of IVF partitions to create.
|
||||
|
||||
For HNSW, we recommend a small number of partitions. Setting this to 1 works
|
||||
well for most tables. For very large tables, training just one HNSW graph
|
||||
will require too much memory. Each partition becomes its own HNSW graph, so
|
||||
setting this value higher reduces the peak memory use of training.
|
||||
|
||||
***
|
||||
|
||||
### numSubVectors?
|
||||
|
||||
```ts
|
||||
optional numSubVectors: number;
|
||||
```
|
||||
|
||||
Number of sub-vectors of PQ.
|
||||
|
||||
This value controls how much the vector is compressed during the quantization step.
|
||||
The more sub vectors there are the less the vector is compressed. The default is
|
||||
the dimension of the vector divided by 16. If the dimension is not evenly divisible
|
||||
by 16 we use the dimension divded by 8.
|
||||
|
||||
The above two cases are highly preferred. Having 8 or 16 values per subvector allows
|
||||
us to use efficient SIMD instructions.
|
||||
|
||||
If the dimension is not visible by 8 then we use 1 subvector. This is not ideal and
|
||||
will likely result in poor performance.
|
||||
|
||||
***
|
||||
|
||||
### sampleRate?
|
||||
|
||||
```ts
|
||||
optional sampleRate: number;
|
||||
```
|
||||
|
||||
The rate used to calculate the number of training vectors for kmeans.
|
||||
|
||||
Default value is 256.
|
||||
|
||||
When an IVF index is trained, we need to calculate partitions. These are groups
|
||||
of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
||||
|
||||
Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
||||
random sample of the data. This parameter controls the size of the sample. The total
|
||||
number of vectors used to train the index is `sample_rate * num_partitions`.
|
||||
|
||||
Increasing this value might improve the quality of the index but in most cases the
|
||||
default should be sufficient.
|
||||
128
docs/src/js/interfaces/HnswSqOptions.md
Normal file
128
docs/src/js/interfaces/HnswSqOptions.md
Normal file
@@ -0,0 +1,128 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / HnswSqOptions
|
||||
|
||||
# Interface: HnswSqOptions
|
||||
|
||||
Options to create an `HNSW_SQ` index
|
||||
|
||||
## Properties
|
||||
|
||||
### distanceType?
|
||||
|
||||
```ts
|
||||
optional distanceType: "l2" | "cosine" | "dot";
|
||||
```
|
||||
|
||||
The distance metric used to train the index.
|
||||
|
||||
Default value is "l2".
|
||||
|
||||
The following distance types are available:
|
||||
|
||||
"l2" - Euclidean distance. This is a very common distance metric that
|
||||
accounts for both magnitude and direction when determining the distance
|
||||
between vectors. L2 distance has a range of [0, ∞).
|
||||
|
||||
"cosine" - Cosine distance. Cosine distance is a distance metric
|
||||
calculated from the cosine similarity between two vectors. Cosine
|
||||
similarity is a measure of similarity between two non-zero vectors of an
|
||||
inner product space. It is defined to equal the cosine of the angle
|
||||
between them. Unlike L2, the cosine distance is not affected by the
|
||||
magnitude of the vectors. Cosine distance has a range of [0, 2].
|
||||
|
||||
"dot" - Dot product. Dot distance is the dot product of two vectors. Dot
|
||||
distance has a range of (-∞, ∞). If the vectors are normalized (i.e. their
|
||||
L2 norm is 1), then dot distance is equivalent to the cosine distance.
|
||||
|
||||
***
|
||||
|
||||
### efConstruction?
|
||||
|
||||
```ts
|
||||
optional efConstruction: number;
|
||||
```
|
||||
|
||||
The number of candidates to evaluate during the construction of the HNSW graph.
|
||||
|
||||
The default value is 300.
|
||||
|
||||
This value controls the tradeoff between build speed and accuracy.
|
||||
The higher the value the more accurate the build but the slower it will be.
|
||||
150 to 300 is the typical range. 100 is a minimum for good quality search
|
||||
results. In most cases, there is no benefit to setting this higher than 500.
|
||||
This value should be set to a value that is not less than `ef` in the search phase.
|
||||
|
||||
***
|
||||
|
||||
### m?
|
||||
|
||||
```ts
|
||||
optional m: number;
|
||||
```
|
||||
|
||||
The number of neighbors to select for each vector in the HNSW graph.
|
||||
|
||||
The default value is 20.
|
||||
|
||||
This value controls the tradeoff between search speed and accuracy.
|
||||
The higher the value the more accurate the search but the slower it will be.
|
||||
|
||||
***
|
||||
|
||||
### maxIterations?
|
||||
|
||||
```ts
|
||||
optional maxIterations: number;
|
||||
```
|
||||
|
||||
Max iterations to train kmeans.
|
||||
|
||||
The default value is 50.
|
||||
|
||||
When training an IVF index we use kmeans to calculate the partitions. This parameter
|
||||
controls how many iterations of kmeans to run.
|
||||
|
||||
Increasing this might improve the quality of the index but in most cases the parameter
|
||||
is unused because kmeans will converge with fewer iterations. The parameter is only
|
||||
used in cases where kmeans does not appear to converge. In those cases it is unlikely
|
||||
that setting this larger will lead to the index converging anyways.
|
||||
|
||||
***
|
||||
|
||||
### numPartitions?
|
||||
|
||||
```ts
|
||||
optional numPartitions: number;
|
||||
```
|
||||
|
||||
The number of IVF partitions to create.
|
||||
|
||||
For HNSW, we recommend a small number of partitions. Setting this to 1 works
|
||||
well for most tables. For very large tables, training just one HNSW graph
|
||||
will require too much memory. Each partition becomes its own HNSW graph, so
|
||||
setting this value higher reduces the peak memory use of training.
|
||||
|
||||
***
|
||||
|
||||
### sampleRate?
|
||||
|
||||
```ts
|
||||
optional sampleRate: number;
|
||||
```
|
||||
|
||||
The rate used to calculate the number of training vectors for kmeans.
|
||||
|
||||
Default value is 256.
|
||||
|
||||
When an IVF index is trained, we need to calculate partitions. These are groups
|
||||
of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
||||
|
||||
Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
||||
random sample of the data. This parameter controls the size of the sample. The total
|
||||
number of vectors used to train the index is `sample_rate * num_partitions`.
|
||||
|
||||
Increasing this value might improve the quality of the index but in most cases the
|
||||
default should be sufficient.
|
||||
40
docs/src/js/interfaces/OpenTableOptions.md
Normal file
40
docs/src/js/interfaces/OpenTableOptions.md
Normal file
@@ -0,0 +1,40 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / OpenTableOptions
|
||||
|
||||
# Interface: OpenTableOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### indexCacheSize?
|
||||
|
||||
```ts
|
||||
optional indexCacheSize: number;
|
||||
```
|
||||
|
||||
Set the size of the index cache, specified as a number of entries
|
||||
|
||||
The exact meaning of an "entry" will depend on the type of index:
|
||||
- IVF: there is one entry for each IVF partition
|
||||
- BTREE: there is one entry for the entire index
|
||||
|
||||
This cache applies to the entire opened table, across all indices.
|
||||
Setting this value higher will increase performance on larger datasets
|
||||
at the expense of more RAM
|
||||
|
||||
***
|
||||
|
||||
### storageOptions?
|
||||
|
||||
```ts
|
||||
optional storageOptions: Record<string, string>;
|
||||
```
|
||||
|
||||
Configuration for object storage.
|
||||
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
29
docs/src/js/interfaces/OptimizeStats.md
Normal file
29
docs/src/js/interfaces/OptimizeStats.md
Normal file
@@ -0,0 +1,29 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / OptimizeStats
|
||||
|
||||
# Interface: OptimizeStats
|
||||
|
||||
Statistics about an optimize operation
|
||||
|
||||
## Properties
|
||||
|
||||
### compaction
|
||||
|
||||
```ts
|
||||
compaction: CompactionStats;
|
||||
```
|
||||
|
||||
Statistics about the compaction operation
|
||||
|
||||
***
|
||||
|
||||
### prune
|
||||
|
||||
```ts
|
||||
prune: RemovalStats;
|
||||
```
|
||||
|
||||
Statistics about the removal operation
|
||||
22
docs/src/js/interfaces/QueryExecutionOptions.md
Normal file
22
docs/src/js/interfaces/QueryExecutionOptions.md
Normal file
@@ -0,0 +1,22 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / QueryExecutionOptions
|
||||
|
||||
# Interface: QueryExecutionOptions
|
||||
|
||||
Options that control the behavior of a particular query execution
|
||||
|
||||
## Properties
|
||||
|
||||
### maxBatchLength?
|
||||
|
||||
```ts
|
||||
optional maxBatchLength: number;
|
||||
```
|
||||
|
||||
The maximum number of rows to return in a single batch
|
||||
|
||||
Batches may have fewer rows if the underlying data is stored
|
||||
in smaller chunks.
|
||||
29
docs/src/js/interfaces/RemovalStats.md
Normal file
29
docs/src/js/interfaces/RemovalStats.md
Normal file
@@ -0,0 +1,29 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / RemovalStats
|
||||
|
||||
# Interface: RemovalStats
|
||||
|
||||
Statistics about a cleanup operation
|
||||
|
||||
## Properties
|
||||
|
||||
### bytesRemoved
|
||||
|
||||
```ts
|
||||
bytesRemoved: number;
|
||||
```
|
||||
|
||||
The number of bytes removed
|
||||
|
||||
***
|
||||
|
||||
### oldVersionsRemoved
|
||||
|
||||
```ts
|
||||
oldVersionsRemoved: number;
|
||||
```
|
||||
|
||||
The number of old versions removed
|
||||
31
docs/src/js/interfaces/Version.md
Normal file
31
docs/src/js/interfaces/Version.md
Normal file
@@ -0,0 +1,31 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / Version
|
||||
|
||||
# Interface: Version
|
||||
|
||||
## Properties
|
||||
|
||||
### metadata
|
||||
|
||||
```ts
|
||||
metadata: Record<string, string>;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### timestamp
|
||||
|
||||
```ts
|
||||
timestamp: Date;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
version: number;
|
||||
```
|
||||
@@ -1,19 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / WriteOptions
|
||||
|
||||
# Interface: WriteOptions
|
||||
|
||||
Write options when creating a Table.
|
||||
|
||||
## Properties
|
||||
|
||||
### mode?
|
||||
|
||||
```ts
|
||||
optional mode: WriteMode;
|
||||
```
|
||||
|
||||
Write mode for writing to a table.
|
||||
@@ -17,6 +17,14 @@
|
||||
### Interfaces
|
||||
|
||||
- [EmbeddingFunctionConfig](interfaces/EmbeddingFunctionConfig.md)
|
||||
- [EmbeddingFunctionConstructor](interfaces/EmbeddingFunctionConstructor.md)
|
||||
- [EmbeddingFunctionCreate](interfaces/EmbeddingFunctionCreate.md)
|
||||
- [FieldOptions](interfaces/FieldOptions.md)
|
||||
- [FunctionOptions](interfaces/FunctionOptions.md)
|
||||
|
||||
### Type Aliases
|
||||
|
||||
- [CreateReturnType](type-aliases/CreateReturnType.md)
|
||||
|
||||
### Functions
|
||||
|
||||
|
||||
@@ -8,6 +8,23 @@
|
||||
|
||||
An embedding function that automatically creates vector representation for a given column.
|
||||
|
||||
It's important subclasses pass the **original** options to the super constructor
|
||||
and then pass those options to `resolveVariables` to resolve any variables before
|
||||
using them.
|
||||
|
||||
## Example
|
||||
|
||||
```ts
|
||||
class MyEmbeddingFunction extends EmbeddingFunction {
|
||||
constructor(options: {model: string, timeout: number}) {
|
||||
super(optionsRaw);
|
||||
const options = this.resolveVariables(optionsRaw);
|
||||
this.model = options.model;
|
||||
this.timeout = options.timeout;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Extended by
|
||||
|
||||
- [`TextEmbeddingFunction`](TextEmbeddingFunction.md)
|
||||
@@ -16,7 +33,7 @@ An embedding function that automatically creates vector representation for a giv
|
||||
|
||||
• **T** = `any`
|
||||
|
||||
• **M** *extends* `FunctionOptions` = `FunctionOptions`
|
||||
• **M** *extends* [`FunctionOptions`](../interfaces/FunctionOptions.md) = [`FunctionOptions`](../interfaces/FunctionOptions.md)
|
||||
|
||||
## Constructors
|
||||
|
||||
@@ -82,12 +99,33 @@ The datatype of the embeddings
|
||||
|
||||
***
|
||||
|
||||
### getSensitiveKeys()
|
||||
|
||||
```ts
|
||||
protected getSensitiveKeys(): string[]
|
||||
```
|
||||
|
||||
Provide a list of keys in the function options that should be treated as
|
||||
sensitive. If users pass raw values for these keys, they will be rejected.
|
||||
|
||||
#### Returns
|
||||
|
||||
`string`[]
|
||||
|
||||
***
|
||||
|
||||
### init()?
|
||||
|
||||
```ts
|
||||
optional init(): Promise<void>
|
||||
```
|
||||
|
||||
Optionally load any resources needed for the embedding function.
|
||||
|
||||
This method is called after the embedding function has been initialized
|
||||
but before any embeddings are computed. It is useful for loading local models
|
||||
or other resources that are needed for the embedding function to work.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
@@ -108,6 +146,24 @@ The number of dimensions of the embeddings
|
||||
|
||||
***
|
||||
|
||||
### resolveVariables()
|
||||
|
||||
```ts
|
||||
protected resolveVariables(config): Partial<M>
|
||||
```
|
||||
|
||||
Apply variables to the config.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **config**: `Partial`<`M`>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Partial`<`M`>
|
||||
|
||||
***
|
||||
|
||||
### sourceField()
|
||||
|
||||
```ts
|
||||
@@ -118,53 +174,31 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **optionsOrDatatype**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
||||
* **optionsOrDatatype**: `DataType`<`Type`, `any`> \| `Partial`<[`FieldOptions`](../interfaces/FieldOptions.md)<`DataType`<`Type`, `any`>>>
|
||||
The options for the field or the datatype
|
||||
|
||||
#### Returns
|
||||
|
||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>]
|
||||
|
||||
#### See
|
||||
|
||||
lancedb.LanceSchema
|
||||
[LanceSchema](../functions/LanceSchema.md)
|
||||
|
||||
***
|
||||
|
||||
### toJSON()
|
||||
|
||||
```ts
|
||||
abstract toJSON(): Partial<M>
|
||||
toJSON(): Record<string, any>
|
||||
```
|
||||
|
||||
Convert the embedding function to a JSON object
|
||||
It is used to serialize the embedding function to the schema
|
||||
It's important that any object returned by this method contains all the necessary
|
||||
information to recreate the embedding function
|
||||
|
||||
It should return the same object that was passed to the constructor
|
||||
If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
|
||||
Get the original arguments to the constructor, to serialize them so they
|
||||
can be used to recreate the embedding function later.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Partial`<`M`>
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
class MyEmbeddingFunction extends EmbeddingFunction {
|
||||
constructor(options: {model: string, timeout: number}) {
|
||||
super();
|
||||
this.model = options.model;
|
||||
this.timeout = options.timeout;
|
||||
}
|
||||
toJSON() {
|
||||
return {
|
||||
model: this.model,
|
||||
timeout: this.timeout,
|
||||
};
|
||||
}
|
||||
```
|
||||
`Record`<`string`, `any`>
|
||||
|
||||
***
|
||||
|
||||
@@ -178,12 +212,13 @@ vectorField is used in combination with `LanceSchema` to provide a declarative d
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
||||
* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<[`FieldOptions`](../interfaces/FieldOptions.md)<`DataType`<`Type`, `any`>>>
|
||||
The options for the field
|
||||
|
||||
#### Returns
|
||||
|
||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>]
|
||||
|
||||
#### See
|
||||
|
||||
lancedb.LanceSchema
|
||||
[LanceSchema](../functions/LanceSchema.md)
|
||||
|
||||
@@ -51,7 +51,7 @@ Fetch an embedding function by name
|
||||
|
||||
#### Type Parameters
|
||||
|
||||
• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)<`unknown`, `FunctionOptions`>
|
||||
• **T** *extends* [`EmbeddingFunction`](EmbeddingFunction.md)<`unknown`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>
|
||||
|
||||
#### Parameters
|
||||
|
||||
@@ -60,7 +60,7 @@ Fetch an embedding function by name
|
||||
|
||||
#### Returns
|
||||
|
||||
`undefined` \| `EmbeddingFunctionCreate`<`T`>
|
||||
`undefined` \| [`EmbeddingFunctionCreate`](../interfaces/EmbeddingFunctionCreate.md)<`T`>
|
||||
|
||||
***
|
||||
|
||||
@@ -80,6 +80,28 @@ getTableMetadata(functions): Map<string, string>
|
||||
|
||||
***
|
||||
|
||||
### getVar()
|
||||
|
||||
```ts
|
||||
getVar(name): undefined | string
|
||||
```
|
||||
|
||||
Get a variable.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **name**: `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`undefined` \| `string`
|
||||
|
||||
#### See
|
||||
|
||||
[setVar](EmbeddingFunctionRegistry.md#setvar)
|
||||
|
||||
***
|
||||
|
||||
### length()
|
||||
|
||||
```ts
|
||||
@@ -104,7 +126,7 @@ Register an embedding function
|
||||
|
||||
#### Type Parameters
|
||||
|
||||
• **T** *extends* `EmbeddingFunctionConstructor`<[`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>> = `EmbeddingFunctionConstructor`<[`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>
|
||||
• **T** *extends* [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)<[`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>> = [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)<[`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>
|
||||
|
||||
#### Parameters
|
||||
|
||||
@@ -145,3 +167,31 @@ reset the registry to the initial state
|
||||
#### Returns
|
||||
|
||||
`void`
|
||||
|
||||
***
|
||||
|
||||
### setVar()
|
||||
|
||||
```ts
|
||||
setVar(name, value): void
|
||||
```
|
||||
|
||||
Set a variable. These can be accessed in the embedding function
|
||||
configuration using the syntax `$var:variable_name`. If they are not
|
||||
set, an error will be thrown letting you know which key is unset. If you
|
||||
want to supply a default value, you can add an additional part in the
|
||||
configuration like so: `$var:variable_name:default_value`. Default values
|
||||
can be used for runtime configurations that are not sensitive, such as
|
||||
whether to use a GPU for inference.
|
||||
|
||||
The name must not contain colons. The default value can contain colons.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **name**: `string`
|
||||
|
||||
* **value**: `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`void`
|
||||
|
||||
@@ -14,7 +14,7 @@ an abstract class for implementing embedding functions that take text as input
|
||||
|
||||
## Type Parameters
|
||||
|
||||
• **M** *extends* `FunctionOptions` = `FunctionOptions`
|
||||
• **M** *extends* [`FunctionOptions`](../interfaces/FunctionOptions.md) = [`FunctionOptions`](../interfaces/FunctionOptions.md)
|
||||
|
||||
## Constructors
|
||||
|
||||
@@ -114,12 +114,37 @@ abstract generateEmbeddings(texts, ...args): Promise<number[][] | Float32Array[]
|
||||
|
||||
***
|
||||
|
||||
### getSensitiveKeys()
|
||||
|
||||
```ts
|
||||
protected getSensitiveKeys(): string[]
|
||||
```
|
||||
|
||||
Provide a list of keys in the function options that should be treated as
|
||||
sensitive. If users pass raw values for these keys, they will be rejected.
|
||||
|
||||
#### Returns
|
||||
|
||||
`string`[]
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`EmbeddingFunction`](EmbeddingFunction.md).[`getSensitiveKeys`](EmbeddingFunction.md#getsensitivekeys)
|
||||
|
||||
***
|
||||
|
||||
### init()?
|
||||
|
||||
```ts
|
||||
optional init(): Promise<void>
|
||||
```
|
||||
|
||||
Optionally load any resources needed for the embedding function.
|
||||
|
||||
This method is called after the embedding function has been initialized
|
||||
but before any embeddings are computed. It is useful for loading local models
|
||||
or other resources that are needed for the embedding function to work.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
@@ -148,6 +173,28 @@ The number of dimensions of the embeddings
|
||||
|
||||
***
|
||||
|
||||
### resolveVariables()
|
||||
|
||||
```ts
|
||||
protected resolveVariables(config): Partial<M>
|
||||
```
|
||||
|
||||
Apply variables to the config.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **config**: `Partial`<`M`>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Partial`<`M`>
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`EmbeddingFunction`](EmbeddingFunction.md).[`resolveVariables`](EmbeddingFunction.md#resolvevariables)
|
||||
|
||||
***
|
||||
|
||||
### sourceField()
|
||||
|
||||
```ts
|
||||
@@ -158,11 +205,11 @@ sourceField is used in combination with `LanceSchema` to provide a declarative d
|
||||
|
||||
#### Returns
|
||||
|
||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>]
|
||||
|
||||
#### See
|
||||
|
||||
lancedb.LanceSchema
|
||||
[LanceSchema](../functions/LanceSchema.md)
|
||||
|
||||
#### Overrides
|
||||
|
||||
@@ -173,37 +220,15 @@ lancedb.LanceSchema
|
||||
### toJSON()
|
||||
|
||||
```ts
|
||||
abstract toJSON(): Partial<M>
|
||||
toJSON(): Record<string, any>
|
||||
```
|
||||
|
||||
Convert the embedding function to a JSON object
|
||||
It is used to serialize the embedding function to the schema
|
||||
It's important that any object returned by this method contains all the necessary
|
||||
information to recreate the embedding function
|
||||
|
||||
It should return the same object that was passed to the constructor
|
||||
If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
|
||||
Get the original arguments to the constructor, to serialize them so they
|
||||
can be used to recreate the embedding function later.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Partial`<`M`>
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
class MyEmbeddingFunction extends EmbeddingFunction {
|
||||
constructor(options: {model: string, timeout: number}) {
|
||||
super();
|
||||
this.model = options.model;
|
||||
this.timeout = options.timeout;
|
||||
}
|
||||
toJSON() {
|
||||
return {
|
||||
model: this.model,
|
||||
timeout: this.timeout,
|
||||
};
|
||||
}
|
||||
```
|
||||
`Record`<`string`, `any`>
|
||||
|
||||
#### Inherited from
|
||||
|
||||
@@ -221,15 +246,16 @@ vectorField is used in combination with `LanceSchema` to provide a declarative d
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<`FieldOptions`<`DataType`<`Type`, `any`>>>
|
||||
* **optionsOrDatatype?**: `DataType`<`Type`, `any`> \| `Partial`<[`FieldOptions`](../interfaces/FieldOptions.md)<`DataType`<`Type`, `any`>>>
|
||||
The options for the field
|
||||
|
||||
#### Returns
|
||||
|
||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, `FunctionOptions`>>]
|
||||
[`DataType`<`Type`, `any`>, `Map`<`string`, [`EmbeddingFunction`](EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>]
|
||||
|
||||
#### See
|
||||
|
||||
lancedb.LanceSchema
|
||||
[LanceSchema](../functions/LanceSchema.md)
|
||||
|
||||
#### Inherited from
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ Create a schema with embedding functions.
|
||||
|
||||
## Parameters
|
||||
|
||||
* **fields**: `Record`<`string`, `object` \| [`object`, `Map`<`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>>]>
|
||||
* **fields**: `Record`<`string`, `object` \| [`object`, `Map`<`string`, [`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>]>
|
||||
|
||||
## Returns
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ function register(name?): (ctor) => any
|
||||
|
||||
### Parameters
|
||||
|
||||
* **ctor**: `EmbeddingFunctionConstructor`<[`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, `FunctionOptions`>>
|
||||
* **ctor**: [`EmbeddingFunctionConstructor`](../interfaces/EmbeddingFunctionConstructor.md)<[`EmbeddingFunction`](../classes/EmbeddingFunction.md)<`any`, [`FunctionOptions`](../interfaces/FunctionOptions.md)>>
|
||||
|
||||
### Returns
|
||||
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
[**@lancedb/lancedb**](../../../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / EmbeddingFunctionConstructor
|
||||
|
||||
# Interface: EmbeddingFunctionConstructor<T>
|
||||
|
||||
## Type Parameters
|
||||
|
||||
• **T** *extends* [`EmbeddingFunction`](../classes/EmbeddingFunction.md) = [`EmbeddingFunction`](../classes/EmbeddingFunction.md)
|
||||
|
||||
## Constructors
|
||||
|
||||
### new EmbeddingFunctionConstructor()
|
||||
|
||||
```ts
|
||||
new EmbeddingFunctionConstructor(modelOptions?): T
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **modelOptions?**: `T`\[`"TOptions"`\]
|
||||
|
||||
#### Returns
|
||||
|
||||
`T`
|
||||
@@ -0,0 +1,27 @@
|
||||
[**@lancedb/lancedb**](../../../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / EmbeddingFunctionCreate
|
||||
|
||||
# Interface: EmbeddingFunctionCreate<T>
|
||||
|
||||
## Type Parameters
|
||||
|
||||
• **T** *extends* [`EmbeddingFunction`](../classes/EmbeddingFunction.md)
|
||||
|
||||
## Methods
|
||||
|
||||
### create()
|
||||
|
||||
```ts
|
||||
create(options?): CreateReturnType<T>
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `T`\[`"TOptions"`\]
|
||||
|
||||
#### Returns
|
||||
|
||||
[`CreateReturnType`](../type-aliases/CreateReturnType.md)<`T`>
|
||||
27
docs/src/js/namespaces/embedding/interfaces/FieldOptions.md
Normal file
27
docs/src/js/namespaces/embedding/interfaces/FieldOptions.md
Normal file
@@ -0,0 +1,27 @@
|
||||
[**@lancedb/lancedb**](../../../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / FieldOptions
|
||||
|
||||
# Interface: FieldOptions<T>
|
||||
|
||||
## Type Parameters
|
||||
|
||||
• **T** *extends* `DataType` = `DataType`
|
||||
|
||||
## Properties
|
||||
|
||||
### datatype
|
||||
|
||||
```ts
|
||||
datatype: T;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### dims?
|
||||
|
||||
```ts
|
||||
optional dims: number;
|
||||
```
|
||||
@@ -0,0 +1,13 @@
|
||||
[**@lancedb/lancedb**](../../../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / FunctionOptions
|
||||
|
||||
# Interface: FunctionOptions
|
||||
|
||||
Options for a given embedding function
|
||||
|
||||
## Indexable
|
||||
|
||||
\[`key`: `string`\]: `any`
|
||||
@@ -0,0 +1,15 @@
|
||||
[**@lancedb/lancedb**](../../../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../../../globals.md) / [embedding](../README.md) / CreateReturnType
|
||||
|
||||
# Type Alias: CreateReturnType<T>
|
||||
|
||||
```ts
|
||||
type CreateReturnType<T>: T extends object ? Promise<T> : T;
|
||||
```
|
||||
|
||||
## Type Parameters
|
||||
|
||||
• **T**
|
||||
@@ -8,24 +8,6 @@
|
||||
|
||||
Reranks the results using the Reciprocal Rank Fusion (RRF) algorithm.
|
||||
|
||||
Internally this uses the Rust implementation
|
||||
|
||||
## Constructors
|
||||
|
||||
### new RRFReranker()
|
||||
|
||||
```ts
|
||||
new RRFReranker(inner): RRFReranker
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **inner**: `RrfReranker`
|
||||
|
||||
#### Returns
|
||||
|
||||
[`RRFReranker`](RRFReranker.md)
|
||||
|
||||
## Methods
|
||||
|
||||
### rerankHybrid()
|
||||
|
||||
11
docs/src/js/type-aliases/DataLike.md
Normal file
11
docs/src/js/type-aliases/DataLike.md
Normal file
@@ -0,0 +1,11 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DataLike
|
||||
|
||||
# Type Alias: DataLike
|
||||
|
||||
```ts
|
||||
type DataLike: Data | object;
|
||||
```
|
||||
11
docs/src/js/type-aliases/FieldLike.md
Normal file
11
docs/src/js/type-aliases/FieldLike.md
Normal file
@@ -0,0 +1,11 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / FieldLike
|
||||
|
||||
# Type Alias: FieldLike
|
||||
|
||||
```ts
|
||||
type FieldLike: Field | object;
|
||||
```
|
||||
19
docs/src/js/type-aliases/IntoSql.md
Normal file
19
docs/src/js/type-aliases/IntoSql.md
Normal file
@@ -0,0 +1,19 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / IntoSql
|
||||
|
||||
# Type Alias: IntoSql
|
||||
|
||||
```ts
|
||||
type IntoSql:
|
||||
| string
|
||||
| number
|
||||
| boolean
|
||||
| null
|
||||
| Date
|
||||
| ArrayBufferLike
|
||||
| Buffer
|
||||
| IntoSql[];
|
||||
```
|
||||
11
docs/src/js/type-aliases/IntoVector.md
Normal file
11
docs/src/js/type-aliases/IntoVector.md
Normal file
@@ -0,0 +1,11 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / IntoVector
|
||||
|
||||
# Type Alias: IntoVector
|
||||
|
||||
```ts
|
||||
type IntoVector: Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
|
||||
```
|
||||
11
docs/src/js/type-aliases/RecordBatchLike.md
Normal file
11
docs/src/js/type-aliases/RecordBatchLike.md
Normal file
@@ -0,0 +1,11 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / RecordBatchLike
|
||||
|
||||
# Type Alias: RecordBatchLike
|
||||
|
||||
```ts
|
||||
type RecordBatchLike: RecordBatch | object;
|
||||
```
|
||||
11
docs/src/js/type-aliases/SchemaLike.md
Normal file
11
docs/src/js/type-aliases/SchemaLike.md
Normal file
@@ -0,0 +1,11 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / SchemaLike
|
||||
|
||||
# Type Alias: SchemaLike
|
||||
|
||||
```ts
|
||||
type SchemaLike: Schema | object;
|
||||
```
|
||||
11
docs/src/js/type-aliases/TableLike.md
Normal file
11
docs/src/js/type-aliases/TableLike.md
Normal file
@@ -0,0 +1,11 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / TableLike
|
||||
|
||||
# Type Alias: TableLike
|
||||
|
||||
```ts
|
||||
type TableLike: ArrowTable | object;
|
||||
```
|
||||
@@ -9,24 +9,51 @@ LanceDB supports [Polars](https://github.com/pola-rs/polars), a blazingly fast D
|
||||
|
||||
First, we connect to a LanceDB database.
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```py
|
||||
--8<-- "python/python/tests/docs/test_python.py:import-lancedb"
|
||||
--8<-- "python/python/tests/docs/test_python.py:connect_to_lancedb"
|
||||
```
|
||||
|
||||
=== "Async API"
|
||||
|
||||
```py
|
||||
--8<-- "python/python/tests/docs/test_python.py:import-lancedb"
|
||||
--8<-- "python/python/tests/docs/test_python.py:connect_to_lancedb_async"
|
||||
```
|
||||
|
||||
|
||||
We can load a Polars `DataFrame` to LanceDB directly.
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```py
|
||||
--8<-- "python/python/tests/docs/test_python.py:import-polars"
|
||||
--8<-- "python/python/tests/docs/test_python.py:create_table_polars"
|
||||
```
|
||||
|
||||
=== "Async API"
|
||||
|
||||
```py
|
||||
--8<-- "python/python/tests/docs/test_python.py:import-polars"
|
||||
--8<-- "python/python/tests/docs/test_python.py:create_table_polars_async"
|
||||
```
|
||||
|
||||
We can now perform similarity search via the LanceDB Python API.
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```py
|
||||
--8<-- "python/python/tests/docs/test_python.py:vector_search_polars"
|
||||
```
|
||||
|
||||
=== "Async API"
|
||||
|
||||
```py
|
||||
--8<-- "python/python/tests/docs/test_python.py:vector_search_polars_async"
|
||||
```
|
||||
|
||||
In addition to the selected columns, LanceDB also returns a vector
|
||||
and also the `_distance` column which is the distance between the query
|
||||
vector and the returned vector.
|
||||
@@ -112,4 +139,3 @@ The reason it's beneficial to not convert the LanceDB Table
|
||||
to a DataFrame is because the table can potentially be way larger
|
||||
than memory, and Polars LazyFrames allow us to work with such
|
||||
larger-than-memory datasets by not loading it into memory all at once.
|
||||
|
||||
|
||||
@@ -2,14 +2,19 @@
|
||||
|
||||
[Pydantic](https://docs.pydantic.dev/latest/) is a data validation library in Python.
|
||||
LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting.
|
||||
Using [LanceModel][lancedb.pydantic.LanceModel], users can seamlessly
|
||||
integrate Pydantic with the rest of the LanceDB APIs.
|
||||
|
||||
## Schema
|
||||
```python
|
||||
|
||||
LanceDB supports to create Apache Arrow Schema from a
|
||||
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
|
||||
via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
|
||||
--8<-- "python/python/tests/docs/test_pydantic_integration.py:imports"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_model"
|
||||
|
||||
--8<-- "python/python/tests/docs/test_pydantic_integration.py:set_url"
|
||||
--8<-- "python/python/tests/docs/test_pydantic_integration.py:base_example"
|
||||
```
|
||||
|
||||
::: lancedb.pydantic.pydantic_to_schema
|
||||
|
||||
## Vector Field
|
||||
|
||||
@@ -34,3 +39,9 @@ Current supported type conversions:
|
||||
| `list` | `pyarrow.List` |
|
||||
| `BaseModel` | `pyarrow.Struct` |
|
||||
| `Vector(n)` | `pyarrow.FixedSizeList(float32, n)` |
|
||||
|
||||
LanceDB supports to create Apache Arrow Schema from a
|
||||
[Pydantic BaseModel][pydantic.BaseModel]
|
||||
via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
|
||||
|
||||
::: lancedb.pydantic.pydantic_to_schema
|
||||
|
||||
@@ -122,7 +122,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "sync API"
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_binary_vector.py:imports"
|
||||
@@ -130,7 +130,7 @@ LanceDB supports binary vectors as a data type, and has the ability to search bi
|
||||
--8<-- "python/python/tests/docs/test_binary_vector.py:sync_binary_vector"
|
||||
```
|
||||
|
||||
=== "async API"
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_binary_vector.py:imports"
|
||||
@@ -153,7 +153,7 @@ The vector value type can be `float16`, `float32` or `float64`.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "sync API"
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_multivector.py:imports"
|
||||
@@ -161,7 +161,7 @@ The vector value type can be `float16`, `float32` or `float64`.
|
||||
--8<-- "python/python/tests/docs/test_multivector.py:sync_multivector"
|
||||
```
|
||||
|
||||
=== "async API"
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_multivector.py:imports"
|
||||
@@ -175,7 +175,7 @@ You can also search for vectors within a specific distance range from the query
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "sync API"
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_distance_range.py:imports"
|
||||
@@ -183,7 +183,7 @@ You can also search for vectors within a specific distance range from the query
|
||||
--8<-- "python/python/tests/docs/test_distance_range.py:sync_distance_range"
|
||||
```
|
||||
|
||||
=== "async API"
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
--8<-- "python/python/tests/docs/test_distance_range.py:imports"
|
||||
|
||||
@@ -20,6 +20,7 @@ async function setup() {
|
||||
}
|
||||
|
||||
async () => {
|
||||
console.log("search_legacy.ts: start");
|
||||
await setup();
|
||||
|
||||
// --8<-- [start:search1]
|
||||
@@ -37,5 +38,5 @@ async () => {
|
||||
.execute();
|
||||
// --8<-- [end:search2]
|
||||
|
||||
console.log("search: done");
|
||||
console.log("search_legacy.ts: done");
|
||||
};
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import * as vectordb from "vectordb";
|
||||
|
||||
(async () => {
|
||||
console.log("sql_legacy.ts: start");
|
||||
const db = await vectordb.connect("data/sample-lancedb");
|
||||
|
||||
let data = [];
|
||||
@@ -34,5 +35,5 @@ import * as vectordb from "vectordb";
|
||||
await tbl.filter("id = 10").limit(10).execute();
|
||||
// --8<-- [end:sql_search]
|
||||
|
||||
console.log("SQL search: done");
|
||||
console.log("sql_legacy.ts: done");
|
||||
})();
|
||||
|
||||
@@ -11,9 +11,11 @@ excluded_globs = [
|
||||
"../src/examples/*.md",
|
||||
"../src/integrations/*.md",
|
||||
"../src/guides/tables.md",
|
||||
"../src/guides/tables/merge_insert.md",
|
||||
"../src/python/duckdb.md",
|
||||
"../src/python/pandas_and_pyarrow.md",
|
||||
"../src/python/polars_arrow.md",
|
||||
"../src/python/pydantic.md",
|
||||
"../src/embeddings/*.md",
|
||||
"../src/concepts/*.md",
|
||||
"../src/ann_indexes.md",
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.15.1-beta.2</version>
|
||||
<version>0.18.0-beta.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.15.1-beta.2</version>
|
||||
<version>0.18.0-beta.0</version>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<name>LanceDB Parent</name>
|
||||
|
||||
124
node/package-lock.json
generated
124
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.15.1-beta.1",
|
||||
"version": "0.18.0-beta.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.15.1-beta.1",
|
||||
"version": "0.18.0-beta.0",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -52,14 +52,14 @@
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.15.1-beta.1",
|
||||
"@lancedb/vectordb-darwin-x64": "0.15.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.1",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.1",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.1",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.1",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.1"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-darwin-x64": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.18.0-beta.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@apache-arrow/ts": "^14.0.2",
|
||||
@@ -329,6 +329,110 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.10"
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.18.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.18.0-beta.0.tgz",
|
||||
"integrity": "sha512-dLLgMPllYJOiRfPqkqkmoQu48RIa7K4dOF/qFP8Aex3zqeHE/0sFm3DYjtSFc6SR/6yT8u6Y9iFo2cQp5rCFJA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.18.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.18.0-beta.0.tgz",
|
||||
"integrity": "sha512-la0eauU0rzHO5eeVjBt8o/5UW4VzRYAuRA7nqUFLX5T6SWP5+UWjqusVVbWGz3ski+8uEX6VhlaFZP5uIJKGIg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.18.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.18.0-beta.0.tgz",
|
||||
"integrity": "sha512-AkXI/lB3yu1Di2G1lhilf89V6qPTppb13aAt+/6gU5/PSfA94y9VXD67D4WyvRbuQghJjDvAavMlWMrJc2NuMw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-musl": {
|
||||
"version": "0.18.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-musl/-/vectordb-linux-arm64-musl-0.18.0-beta.0.tgz",
|
||||
"integrity": "sha512-kTVcJ4LA8w/7egY4m0EXOt8c1DeFUquVtyvexO+VzIFeeHfBkkrMI0DkE0CpHmk+gctkG7EY39jzjgLnPvppnw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.18.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.18.0-beta.0.tgz",
|
||||
"integrity": "sha512-KbtIy5DkaWTsKENm5Q27hjovrR7FRuoHhl0wDJtO/2CUZYlrskjEIfcfkfA2CrEQesBug4s5jgsvNM4Wcp6zoA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-musl": {
|
||||
"version": "0.18.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-musl/-/vectordb-linux-x64-musl-0.18.0-beta.0.tgz",
|
||||
"integrity": "sha512-SF07gmoGVExcF5v+IE6kBbCbXJSDyTgC7QCt+MDS1NsgoQ9OH7IyH7r6HJu16tKflUOUKlUHnP0hQOPpv1fWpg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-arm64-msvc": {
|
||||
"version": "0.18.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-arm64-msvc/-/vectordb-win32-arm64-msvc-0.18.0-beta.0.tgz",
|
||||
"integrity": "sha512-YYBuSBGDlxJgSI5gHjDmQo9sl05lAXfzil6QiKfgmUMsBtb2sT+GoUCgG6qzsfe99sWiTf+pMeWDsQgfrj9vNw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.18.0-beta.0",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.18.0-beta.0.tgz",
|
||||
"integrity": "sha512-t9TXeUnMU7YbP+/nUJpStm75aWwUydZj2AK+G2XwDtQrQo4Xg7/NETEbBeogmIOHuidNQYia8jEeQCUon5/+Dw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
]
|
||||
},
|
||||
"node_modules/@neon-rs/cli": {
|
||||
"version": "0.0.160",
|
||||
"resolved": "https://registry.npmjs.org/@neon-rs/cli/-/cli-0.0.160.tgz",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.15.1-beta.2",
|
||||
"version": "0.18.0-beta.0",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"private": false,
|
||||
"main": "dist/index.js",
|
||||
@@ -92,13 +92,13 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-x64": "0.15.1-beta.2",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.15.1-beta.2",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.15.1-beta.2",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.15.1-beta.2",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.15.1-beta.2",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.15.1-beta.2",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.15.1-beta.2",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.15.1-beta.2"
|
||||
"@lancedb/vectordb-darwin-x64": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-darwin-arm64": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-linux-x64-musl": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-linux-arm64-musl": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.18.0-beta.0",
|
||||
"@lancedb/vectordb-win32-arm64-msvc": "0.18.0-beta.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,7 +47,8 @@ const {
|
||||
tableSchema,
|
||||
tableAddColumns,
|
||||
tableAlterColumns,
|
||||
tableDropColumns
|
||||
tableDropColumns,
|
||||
tableDropIndex
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
} = require("../native.js");
|
||||
|
||||
@@ -604,6 +605,13 @@ export interface Table<T = number[]> {
|
||||
*/
|
||||
dropColumns(columnNames: string[]): Promise<void>
|
||||
|
||||
/**
|
||||
* Drop an index from the table
|
||||
*
|
||||
* @param indexName The name of the index to drop
|
||||
*/
|
||||
dropIndex(indexName: string): Promise<void>
|
||||
|
||||
/**
|
||||
* Instrument the behavior of this Table with middleware.
|
||||
*
|
||||
@@ -1206,6 +1214,10 @@ export class LocalTable<T = number[]> implements Table<T> {
|
||||
return tableDropColumns.call(this._tbl, columnNames);
|
||||
}
|
||||
|
||||
async dropIndex(indexName: string): Promise<void> {
|
||||
return tableDropIndex.call(this._tbl, indexName);
|
||||
}
|
||||
|
||||
withMiddleware(middleware: HttpMiddleware): Table<T> {
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -471,6 +471,18 @@ export class RemoteTable<T = number[]> implements Table<T> {
|
||||
)
|
||||
}
|
||||
}
|
||||
async dropIndex (index_name: string): Promise<void> {
|
||||
const res = await this._client.post(
|
||||
`/v1/table/${encodeURIComponent(this._name)}/index/${encodeURIComponent(index_name)}/drop/`
|
||||
)
|
||||
if (res.status !== 200) {
|
||||
throw new Error(
|
||||
`Server Error, status: ${res.status}, ` +
|
||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
||||
`message: ${res.statusText}: ${await res.body()}`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async countRows (filter?: string): Promise<number> {
|
||||
const result = await this._client.post(`/v1/table/${encodeURIComponent(this._name)}/count_rows/`, {
|
||||
|
||||
@@ -894,6 +894,27 @@ describe("LanceDB client", function () {
|
||||
expect(stats.distanceType).to.equal("l2");
|
||||
expect(stats.numIndices).to.equal(1);
|
||||
}).timeout(50_000);
|
||||
|
||||
// not yet implemented
|
||||
// it("can drop index", async function () {
|
||||
// const uri = await createTestDB(32, 300);
|
||||
// const con = await lancedb.connect(uri);
|
||||
// const table = await con.openTable("vectors");
|
||||
// await table.createIndex({
|
||||
// type: "ivf_pq",
|
||||
// column: "vector",
|
||||
// num_partitions: 2,
|
||||
// max_iters: 2,
|
||||
// num_sub_vectors: 2
|
||||
// });
|
||||
//
|
||||
// const indices = await table.listIndices();
|
||||
// expect(indices).to.have.lengthOf(1);
|
||||
// expect(indices[0].name).to.equal("vector_idx");
|
||||
//
|
||||
// await table.dropIndex("vector_idx");
|
||||
// expect(await table.listIndices()).to.have.lengthOf(0);
|
||||
// }).timeout(50_000);
|
||||
});
|
||||
|
||||
describe("when using a custom embedding function", function () {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.15.1-beta.2"
|
||||
version = "0.18.0-beta.0"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
@@ -32,7 +32,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](../basic.md) contains a more complete example.
|
||||
The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
Float64,
|
||||
Struct,
|
||||
List,
|
||||
Int16,
|
||||
Int32,
|
||||
Int64,
|
||||
Float,
|
||||
@@ -108,13 +109,16 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
false,
|
||||
),
|
||||
]);
|
||||
|
||||
const table = (await tableCreationMethod(
|
||||
records,
|
||||
recordsReversed,
|
||||
schema,
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
)) as any;
|
||||
|
||||
// We expect deterministic ordering of the fields
|
||||
expect(table.schema.names).toEqual(schema.names);
|
||||
|
||||
schema.fields.forEach(
|
||||
(
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
@@ -141,13 +145,13 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
describe("The function makeArrowTable", function () {
|
||||
it("will use data types from a provided schema instead of inference", async function () {
|
||||
const schema = new Schema([
|
||||
new Field("a", new Int32()),
|
||||
new Field("b", new Float32()),
|
||||
new Field("a", new Int32(), false),
|
||||
new Field("b", new Float32(), true),
|
||||
new Field(
|
||||
"c",
|
||||
new FixedSizeList(3, new Field("item", new Float16())),
|
||||
),
|
||||
new Field("d", new Int64()),
|
||||
new Field("d", new Int64(), true),
|
||||
]);
|
||||
const table = makeArrowTable(
|
||||
[
|
||||
@@ -165,12 +169,15 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
expect(actual.numRows).toBe(3);
|
||||
const actualSchema = actual.schema;
|
||||
expect(actualSchema).toEqual(schema);
|
||||
expect(table.getChild("a")?.toJSON()).toEqual([1, 4, 7]);
|
||||
expect(table.getChild("b")?.toJSON()).toEqual([2, 5, 8]);
|
||||
expect(table.getChild("d")?.toJSON()).toEqual([9n, 10n, null]);
|
||||
});
|
||||
|
||||
it("will assume the column `vector` is FixedSizeList<Float32> by default", async function () {
|
||||
const schema = new Schema([
|
||||
new Field("a", new Float(Precision.DOUBLE), true),
|
||||
new Field("b", new Float(Precision.DOUBLE), true),
|
||||
new Field("b", new Int64(), true),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(
|
||||
@@ -181,9 +188,9 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
),
|
||||
]);
|
||||
const table = makeArrowTable([
|
||||
{ a: 1, b: 2, vector: [1, 2, 3] },
|
||||
{ a: 4, b: 5, vector: [4, 5, 6] },
|
||||
{ a: 7, b: 8, vector: [7, 8, 9] },
|
||||
{ a: 1, b: 2n, vector: [1, 2, 3] },
|
||||
{ a: 4, b: 5n, vector: [4, 5, 6] },
|
||||
{ a: 7, b: 8n, vector: [7, 8, 9] },
|
||||
]);
|
||||
|
||||
const buf = await fromTableToBuffer(table);
|
||||
@@ -193,6 +200,19 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
expect(actual.numRows).toBe(3);
|
||||
const actualSchema = actual.schema;
|
||||
expect(actualSchema).toEqual(schema);
|
||||
|
||||
expect(table.getChild("a")?.toJSON()).toEqual([1, 4, 7]);
|
||||
expect(table.getChild("b")?.toJSON()).toEqual([2n, 5n, 8n]);
|
||||
expect(
|
||||
table
|
||||
.getChild("vector")
|
||||
?.toJSON()
|
||||
.map((v) => v.toJSON()),
|
||||
).toEqual([
|
||||
[1, 2, 3],
|
||||
[4, 5, 6],
|
||||
[7, 8, 9],
|
||||
]);
|
||||
});
|
||||
|
||||
it("can support multiple vector columns", async function () {
|
||||
@@ -206,7 +226,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
),
|
||||
new Field(
|
||||
"vec2",
|
||||
new FixedSizeList(3, new Field("item", new Float16(), true)),
|
||||
new FixedSizeList(3, new Field("item", new Float64(), true)),
|
||||
true,
|
||||
),
|
||||
]);
|
||||
@@ -219,7 +239,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
{
|
||||
vectorColumns: {
|
||||
vec1: { type: new Float16() },
|
||||
vec2: { type: new Float16() },
|
||||
vec2: { type: new Float64() },
|
||||
},
|
||||
},
|
||||
);
|
||||
@@ -307,6 +327,53 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it("will allow subsets of columns if nullable", async function () {
|
||||
const schema = new Schema([
|
||||
new Field("a", new Int64(), true),
|
||||
new Field(
|
||||
"s",
|
||||
new Struct([
|
||||
new Field("x", new Int32(), true),
|
||||
new Field("y", new Int32(), true),
|
||||
]),
|
||||
true,
|
||||
),
|
||||
new Field("d", new Int16(), true),
|
||||
]);
|
||||
|
||||
const table = makeArrowTable([{ a: 1n }], { schema });
|
||||
expect(table.numCols).toBe(1);
|
||||
expect(table.numRows).toBe(1);
|
||||
|
||||
const table2 = makeArrowTable([{ a: 1n, d: 2 }], { schema });
|
||||
expect(table2.numCols).toBe(2);
|
||||
|
||||
const table3 = makeArrowTable([{ s: { y: 3 } }], { schema });
|
||||
expect(table3.numCols).toBe(1);
|
||||
const expectedSchema = new Schema([
|
||||
new Field("s", new Struct([new Field("y", new Int32(), true)]), true),
|
||||
]);
|
||||
expect(table3.schema).toEqual(expectedSchema);
|
||||
});
|
||||
|
||||
it("will work even if columns are sparsely provided", async function () {
|
||||
const sparseRecords = [{ a: 1n }, { b: 2n }, { c: 3n }, { d: 4n }];
|
||||
const table = makeArrowTable(sparseRecords);
|
||||
expect(table.numCols).toBe(4);
|
||||
expect(table.numRows).toBe(4);
|
||||
|
||||
const schema = new Schema([
|
||||
new Field("a", new Int64(), true),
|
||||
new Field("b", new Int32(), true),
|
||||
new Field("c", new Int64(), true),
|
||||
new Field("d", new Int16(), true),
|
||||
]);
|
||||
const table2 = makeArrowTable(sparseRecords, { schema });
|
||||
expect(table2.numCols).toBe(4);
|
||||
expect(table2.numRows).toBe(4);
|
||||
expect(table2.schema).toEqual(schema);
|
||||
});
|
||||
});
|
||||
|
||||
class DummyEmbedding extends EmbeddingFunction<string> {
|
||||
|
||||
@@ -17,14 +17,14 @@ describe("when connecting", () => {
|
||||
it("should connect", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
expect(db.display()).toBe(
|
||||
`NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=None)`,
|
||||
`ListingDatabase(uri=${tmpDir.name}, read_consistency_interval=None)`,
|
||||
);
|
||||
});
|
||||
|
||||
it("should allow read consistency interval to be specified", async () => {
|
||||
const db = await connect(tmpDir.name, { readConsistencyInterval: 5 });
|
||||
expect(db.display()).toBe(
|
||||
`NativeDatabase(uri=${tmpDir.name}, read_consistency_interval=5s)`,
|
||||
`ListingDatabase(uri=${tmpDir.name}, read_consistency_interval=5s)`,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -61,6 +61,26 @@ describe("given a connection", () => {
|
||||
await expect(tbl.countRows()).resolves.toBe(1);
|
||||
});
|
||||
|
||||
it("should be able to drop tables`", async () => {
|
||||
await db.createTable("test", [{ id: 1 }, { id: 2 }]);
|
||||
await db.createTable("test2", [{ id: 1 }, { id: 2 }]);
|
||||
await db.createTable("test3", [{ id: 1 }, { id: 2 }]);
|
||||
|
||||
await expect(db.tableNames()).resolves.toEqual(["test", "test2", "test3"]);
|
||||
|
||||
await db.dropTable("test2");
|
||||
|
||||
await expect(db.tableNames()).resolves.toEqual(["test", "test3"]);
|
||||
|
||||
await db.dropAllTables();
|
||||
|
||||
await expect(db.tableNames()).resolves.toEqual([]);
|
||||
|
||||
// Make sure we can still create more tables after dropping all
|
||||
|
||||
await db.createTable("test4", [{ id: 1 }, { id: 2 }]);
|
||||
});
|
||||
|
||||
it("should fail if creating table twice, unless overwrite is true", async () => {
|
||||
let tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
|
||||
await expect(tbl.countRows()).resolves.toBe(2);
|
||||
@@ -96,14 +116,15 @@ describe("given a connection", () => {
|
||||
const data = [...Array(10000).keys()].map((i) => ({ id: i }));
|
||||
|
||||
// Create in v1 mode
|
||||
let table = await db.createTable("test", data, { useLegacyFormat: true });
|
||||
let table = await db.createTable("test", data, {
|
||||
storageOptions: { newTableDataStorageVersion: "legacy" },
|
||||
});
|
||||
|
||||
const isV2 = async (table: Table) => {
|
||||
const data = await table
|
||||
.query()
|
||||
.limit(10000)
|
||||
.toArrow({ maxBatchLength: 100000 });
|
||||
console.log(data.batches.length);
|
||||
return data.batches.length < 5;
|
||||
};
|
||||
|
||||
@@ -122,7 +143,7 @@ describe("given a connection", () => {
|
||||
const schema = new Schema([new Field("id", new Float64(), true)]);
|
||||
|
||||
table = await db.createEmptyTable("test_v2_empty", schema, {
|
||||
useLegacyFormat: false,
|
||||
storageOptions: { newTableDataStorageVersion: "stable" },
|
||||
});
|
||||
|
||||
await table.add(data);
|
||||
|
||||
@@ -17,6 +17,8 @@ import {
|
||||
import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
|
||||
import { getRegistry, register } from "../lancedb/embedding/registry";
|
||||
|
||||
const testOpenAIInteg = process.env.OPENAI_API_KEY == null ? test.skip : test;
|
||||
|
||||
describe("embedding functions", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
beforeEach(() => {
|
||||
@@ -29,9 +31,6 @@ describe("embedding functions", () => {
|
||||
|
||||
it("should be able to create a table with an embedding function", async () => {
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
@@ -75,9 +74,6 @@ describe("embedding functions", () => {
|
||||
it("should be able to append and upsert using embedding function", async () => {
|
||||
@register()
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
@@ -143,9 +139,6 @@ describe("embedding functions", () => {
|
||||
it("should be able to create an empty table with an embedding function", async () => {
|
||||
@register()
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
@@ -194,9 +187,6 @@ describe("embedding functions", () => {
|
||||
it("should error when appending to a table with an unregistered embedding function", async () => {
|
||||
@register("mock")
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
@@ -241,13 +231,35 @@ describe("embedding functions", () => {
|
||||
`Function "mock" not found in registry`,
|
||||
);
|
||||
});
|
||||
|
||||
testOpenAIInteg("propagates variables through all methods", async () => {
|
||||
delete process.env.OPENAI_API_KEY;
|
||||
const registry = getRegistry();
|
||||
registry.setVar("openai_api_key", "sk-...");
|
||||
const func = registry.get("openai")?.create({
|
||||
model: "text-embedding-ada-002",
|
||||
apiKey: "$var:openai_api_key",
|
||||
}) as EmbeddingFunction;
|
||||
|
||||
const db = await connect("memory://");
|
||||
const wordsSchema = LanceSchema({
|
||||
text: func.sourceField(new Utf8()),
|
||||
vector: func.vectorField(),
|
||||
});
|
||||
const tbl = await db.createEmptyTable("words", wordsSchema, {
|
||||
mode: "overwrite",
|
||||
});
|
||||
await tbl.add([{ text: "hello world" }, { text: "goodbye world" }]);
|
||||
|
||||
const query = "greetings";
|
||||
const actual = (await tbl.search(query).limit(1).toArray())[0];
|
||||
expect(actual).toHaveProperty("text");
|
||||
});
|
||||
|
||||
test.each([new Float16(), new Float32(), new Float64()])(
|
||||
"should be able to provide manual embeddings with multiple float datatype",
|
||||
async (floatType) => {
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
@@ -292,10 +304,6 @@ describe("embedding functions", () => {
|
||||
async (floatType) => {
|
||||
@register("test1")
|
||||
class MockEmbeddingFunctionWithoutNDims extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
|
||||
embeddingDataType(): Float {
|
||||
return floatType;
|
||||
}
|
||||
@@ -310,9 +318,6 @@ describe("embedding functions", () => {
|
||||
}
|
||||
@register("test")
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
|
||||
@@ -11,7 +11,11 @@ import * as arrow18 from "apache-arrow-18";
|
||||
import * as tmp from "tmp";
|
||||
|
||||
import { connect } from "../lancedb";
|
||||
import { EmbeddingFunction, LanceSchema } from "../lancedb/embedding";
|
||||
import {
|
||||
EmbeddingFunction,
|
||||
FunctionOptions,
|
||||
LanceSchema,
|
||||
} from "../lancedb/embedding";
|
||||
import { getRegistry, register } from "../lancedb/embedding/registry";
|
||||
|
||||
describe.each([arrow15, arrow16, arrow17, arrow18])("LanceSchema", (arrow) => {
|
||||
@@ -39,11 +43,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
|
||||
it("should register a new item to the registry", async () => {
|
||||
@register("mock-embedding")
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {
|
||||
someText: "hello",
|
||||
};
|
||||
}
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
@@ -89,11 +88,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
|
||||
});
|
||||
test("should error if registering with the same name", async () => {
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {
|
||||
someText: "hello",
|
||||
};
|
||||
}
|
||||
constructor() {
|
||||
super();
|
||||
}
|
||||
@@ -114,13 +108,9 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
|
||||
});
|
||||
test("schema should contain correct metadata", async () => {
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {
|
||||
someText: "hello",
|
||||
};
|
||||
}
|
||||
constructor() {
|
||||
constructor(args: FunctionOptions = {}) {
|
||||
super();
|
||||
this.resolveVariables(args);
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
@@ -132,7 +122,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
|
||||
return data.map(() => [1, 2, 3]);
|
||||
}
|
||||
}
|
||||
const func = new MockEmbeddingFunction();
|
||||
const func = new MockEmbeddingFunction({ someText: "hello" });
|
||||
|
||||
const schema = LanceSchema({
|
||||
id: new arrow.Int32(),
|
||||
@@ -155,3 +145,79 @@ describe.each([arrow15, arrow16, arrow17, arrow18])("Registry", (arrow) => {
|
||||
expect(schema.metadata).toEqual(expectedMetadata);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Registry.setVar", () => {
|
||||
const registry = getRegistry();
|
||||
|
||||
beforeEach(() => {
|
||||
@register("mock-embedding")
|
||||
// biome-ignore lint/correctness/noUnusedVariables :
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
constructor(optionsRaw: FunctionOptions = {}) {
|
||||
super();
|
||||
const options = this.resolveVariables(optionsRaw);
|
||||
|
||||
expect(optionsRaw["someKey"].startsWith("$var:someName")).toBe(true);
|
||||
expect(options["someKey"]).toBe("someValue");
|
||||
|
||||
if (options["secretKey"]) {
|
||||
expect(optionsRaw["secretKey"]).toBe("$var:secretKey");
|
||||
expect(options["secretKey"]).toBe("mySecret");
|
||||
}
|
||||
}
|
||||
async computeSourceEmbeddings(data: string[]) {
|
||||
return data.map(() => [1, 2, 3]);
|
||||
}
|
||||
embeddingDataType() {
|
||||
return new arrow18.Float32() as apiArrow.Float;
|
||||
}
|
||||
protected getSensitiveKeys() {
|
||||
return ["secretKey"];
|
||||
}
|
||||
}
|
||||
});
|
||||
afterEach(() => {
|
||||
registry.reset();
|
||||
});
|
||||
|
||||
it("Should error if the variable is not set", () => {
|
||||
console.log(registry.get("mock-embedding"));
|
||||
expect(() =>
|
||||
registry.get("mock-embedding")!.create({ someKey: "$var:someName" }),
|
||||
).toThrow('Variable "someName" not found');
|
||||
});
|
||||
|
||||
it("should use default values if not set", () => {
|
||||
registry
|
||||
.get("mock-embedding")!
|
||||
.create({ someKey: "$var:someName:someValue" });
|
||||
});
|
||||
|
||||
it("should set a variable that the embedding function understand", () => {
|
||||
registry.setVar("someName", "someValue");
|
||||
registry.get("mock-embedding")!.create({ someKey: "$var:someName" });
|
||||
});
|
||||
|
||||
it("should reject secrets that aren't passed as variables", () => {
|
||||
registry.setVar("someName", "someValue");
|
||||
expect(() =>
|
||||
registry
|
||||
.get("mock-embedding")!
|
||||
.create({ secretKey: "someValue", someKey: "$var:someName" }),
|
||||
).toThrow(
|
||||
'The key "secretKey" is sensitive and cannot be set directly. Please use the $var: syntax to set it.',
|
||||
);
|
||||
});
|
||||
|
||||
it("should not serialize secrets", () => {
|
||||
registry.setVar("someName", "someValue");
|
||||
registry.setVar("secretKey", "mySecret");
|
||||
const func = registry
|
||||
.get("mock-embedding")!
|
||||
.create({ secretKey: "$var:secretKey", someKey: "$var:someName" });
|
||||
expect(func.toJSON()).toEqual({
|
||||
secretKey: "$var:secretKey",
|
||||
someKey: "$var:someName",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -104,4 +104,26 @@ describe("remote connection", () => {
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("should pass on requested extra headers", async () => {
|
||||
await withMockDatabase(
|
||||
(req, res) => {
|
||||
expect(req.headers["x-my-header"]).toEqual("my-value");
|
||||
|
||||
const body = JSON.stringify({ tables: [] });
|
||||
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
|
||||
},
|
||||
async (db) => {
|
||||
const tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual([]);
|
||||
},
|
||||
{
|
||||
clientConfig: {
|
||||
extraHeaders: {
|
||||
"x-my-header": "my-value",
|
||||
},
|
||||
},
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -175,6 +175,8 @@ maybeDescribe("storage_options", () => {
|
||||
|
||||
tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual([]);
|
||||
|
||||
await db.dropAllTables();
|
||||
});
|
||||
|
||||
it("can configure encryption at connection and table level", async () => {
|
||||
@@ -210,6 +212,8 @@ maybeDescribe("storage_options", () => {
|
||||
await table.add([{ a: 2, b: 3 }]);
|
||||
|
||||
await bucket.assertAllEncrypted("test/table2.lance", kmsKey.keyId);
|
||||
|
||||
await db.dropAllTables();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -298,5 +302,32 @@ maybeDescribe("DynamoDB Lock", () => {
|
||||
|
||||
const rowCount = await table.countRows();
|
||||
expect(rowCount).toBe(6);
|
||||
|
||||
await db.dropAllTables();
|
||||
});
|
||||
|
||||
it("clears dynamodb state after dropping all tables", async () => {
|
||||
const uri = `s3+ddb://${bucket.name}/test?ddbTableName=${commitTable.name}`;
|
||||
const db = await connect(uri, {
|
||||
storageOptions: CONFIG,
|
||||
readConsistencyInterval: 0,
|
||||
});
|
||||
|
||||
await db.createTable("foo", [{ a: 1, b: 2 }]);
|
||||
await db.createTable("bar", [{ a: 1, b: 2 }]);
|
||||
|
||||
let tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual(["bar", "foo"]);
|
||||
|
||||
await db.dropAllTables();
|
||||
tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual([]);
|
||||
|
||||
// We can create a new table with the same name as the one we dropped.
|
||||
await db.createTable("foo", [{ a: 1, b: 2 }]);
|
||||
tableNames = await db.tableNames();
|
||||
expect(tableNames).toEqual(["foo"]);
|
||||
|
||||
await db.dropAllTables();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -253,6 +253,31 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
const arrowTbl = await table.toArrow();
|
||||
expect(arrowTbl).toBeInstanceOf(ArrowTable);
|
||||
});
|
||||
|
||||
it("should be able to handle missing fields", async () => {
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field("id", new arrow.Int32(), true),
|
||||
new arrow.Field("y", new arrow.Int32(), true),
|
||||
new arrow.Field("z", new arrow.Int64(), true),
|
||||
]);
|
||||
const db = await connect(tmpDir.name);
|
||||
const table = await db.createEmptyTable("testNull", schema);
|
||||
await table.add([{ id: 1, y: 2 }]);
|
||||
await table.add([{ id: 2 }]);
|
||||
|
||||
await table
|
||||
.mergeInsert("id")
|
||||
.whenNotMatchedInsertAll()
|
||||
.execute([
|
||||
{ id: 3, z: 3 },
|
||||
{ id: 4, z: 5 },
|
||||
]);
|
||||
|
||||
const res = await table.query().toArrow();
|
||||
expect(res.getChild("id")?.toJSON()).toEqual([1, 2, 3, 4]);
|
||||
expect(res.getChild("y")?.toJSON()).toEqual([2, null, null, null]);
|
||||
expect(res.getChild("z")?.toJSON()).toEqual([null, null, 3n, 5n]);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
@@ -641,11 +666,11 @@ describe("When creating an index", () => {
|
||||
expect(fs.readdirSync(indexDir)).toHaveLength(1);
|
||||
|
||||
for await (const r of tbl.query().where("id > 1").select(["id"])) {
|
||||
expect(r.numRows).toBe(10);
|
||||
expect(r.numRows).toBe(298);
|
||||
}
|
||||
// should also work with 'filter' alias
|
||||
for await (const r of tbl.query().filter("id > 1").select(["id"])) {
|
||||
expect(r.numRows).toBe(10);
|
||||
expect(r.numRows).toBe(298);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1013,9 +1038,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
test("can search using a string", async () => {
|
||||
@register()
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
}
|
||||
ndims() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import { expect, test } from "@jest/globals";
|
||||
// --8<-- [start:import]
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import { VectorQuery } from "@lancedb/lancedb";
|
||||
import type { VectorQuery } from "@lancedb/lancedb";
|
||||
// --8<-- [end:import]
|
||||
import { withTempDirectory } from "./util.ts";
|
||||
|
||||
|
||||
@@ -117,7 +117,6 @@ test("basic table examples", async () => {
|
||||
// --8<-- [end:add_data]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:add_columns]
|
||||
await tbl.addColumns([
|
||||
{ name: "double_price", valueSql: "cast((price * 2) as Float)" },
|
||||
@@ -136,7 +135,6 @@ test("basic table examples", async () => {
|
||||
// --8<-- [start:drop_columns]
|
||||
await tbl.dropColumns(["dbl_price"]);
|
||||
// --8<-- [end:drop_columns]
|
||||
}
|
||||
|
||||
{
|
||||
// --8<-- [start:vector_search]
|
||||
|
||||
52
nodejs/examples/biome.json
Normal file
52
nodejs/examples/biome.json
Normal file
@@ -0,0 +1,52 @@
|
||||
{
|
||||
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
|
||||
"vcs": {
|
||||
"enabled": false,
|
||||
"clientKind": "git",
|
||||
"useIgnoreFile": false
|
||||
},
|
||||
"files": {
|
||||
"ignoreUnknown": false,
|
||||
"ignore": []
|
||||
},
|
||||
"formatter": {
|
||||
"enabled": true,
|
||||
"indentStyle": "space"
|
||||
},
|
||||
"organizeImports": {
|
||||
"enabled": true
|
||||
},
|
||||
"linter": {
|
||||
"enabled": true,
|
||||
"rules": {
|
||||
"recommended": true
|
||||
}
|
||||
},
|
||||
"javascript": {
|
||||
"formatter": {
|
||||
"quoteStyle": "double"
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"include": ["*"],
|
||||
"linter": {
|
||||
"rules": {
|
||||
"style": {
|
||||
"noNonNullAssertion": "off"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"include": ["merge_insert.test.ts"],
|
||||
"linter": {
|
||||
"rules": {
|
||||
"style": {
|
||||
"useNamingConvention": "off"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,4 +1,7 @@
|
||||
import { FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
|
||||
import {
|
||||
type FeatureExtractionPipeline,
|
||||
pipeline,
|
||||
} from "@huggingface/transformers";
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import { expect, test } from "@jest/globals";
|
||||
|
||||
@@ -43,12 +43,17 @@ test("custom embedding function", async () => {
|
||||
|
||||
@register("my_embedding")
|
||||
class MyEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
toJSON(): object {
|
||||
return {};
|
||||
constructor(optionsRaw = {}) {
|
||||
super();
|
||||
const options = this.resolveVariables(optionsRaw);
|
||||
// Initialize using options
|
||||
}
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
protected getSensitiveKeys(): string[] {
|
||||
return [];
|
||||
}
|
||||
embeddingDataType(): Float {
|
||||
return new Float32();
|
||||
}
|
||||
@@ -94,3 +99,14 @@ test("custom embedding function", async () => {
|
||||
expect(await table2.countRows()).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
test("embedding function api_key", async () => {
|
||||
// --8<-- [start:register_secret]
|
||||
const registry = getRegistry();
|
||||
registry.setVar("api_key", "sk-...");
|
||||
|
||||
const func = registry.get("openai")!.create({
|
||||
apiKey: "$var:api_key",
|
||||
});
|
||||
// --8<-- [end:register_secret]
|
||||
});
|
||||
|
||||
@@ -42,4 +42,4 @@ test("full text search", async () => {
|
||||
expect(result.length).toBe(10);
|
||||
// --8<-- [end:full_text_search]
|
||||
});
|
||||
});
|
||||
}, 10_000);
|
||||
|
||||
68
nodejs/examples/merge_insert.test.ts
Normal file
68
nodejs/examples/merge_insert.test.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import { expect, test } from "@jest/globals";
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
test("basic upsert", async () => {
|
||||
const db = await lancedb.connect("memory://");
|
||||
|
||||
// --8<-- [start:upsert_basic]
|
||||
const table = await db.createTable("users", [
|
||||
{ id: 0, name: "Alice" },
|
||||
{ id: 1, name: "Bob" },
|
||||
]);
|
||||
|
||||
const newUsers = [
|
||||
{ id: 1, name: "Bobby" },
|
||||
{ id: 2, name: "Charlie" },
|
||||
];
|
||||
await table
|
||||
.mergeInsert("id")
|
||||
.whenMatchedUpdateAll()
|
||||
.whenNotMatchedInsertAll()
|
||||
.execute(newUsers);
|
||||
|
||||
await table.countRows(); // 3
|
||||
// --8<-- [end:upsert_basic]
|
||||
expect(await table.countRows()).toBe(3);
|
||||
|
||||
// --8<-- [start:insert_if_not_exists]
|
||||
const table2 = await db.createTable("domains", [
|
||||
{ domain: "google.com", name: "Google" },
|
||||
{ domain: "github.com", name: "GitHub" },
|
||||
]);
|
||||
|
||||
const newDomains = [
|
||||
{ domain: "google.com", name: "Google" },
|
||||
{ domain: "facebook.com", name: "Facebook" },
|
||||
];
|
||||
await table2
|
||||
.mergeInsert("domain")
|
||||
.whenNotMatchedInsertAll()
|
||||
.execute(newDomains);
|
||||
await table2.countRows(); // 3
|
||||
// --8<-- [end:insert_if_not_exists]
|
||||
expect(await table2.countRows()).toBe(3);
|
||||
|
||||
// --8<-- [start:replace_range]
|
||||
const table3 = await db.createTable("chunks", [
|
||||
{ doc_id: 0, chunk_id: 0, text: "Hello" },
|
||||
{ doc_id: 0, chunk_id: 1, text: "World" },
|
||||
{ doc_id: 1, chunk_id: 0, text: "Foo" },
|
||||
{ doc_id: 1, chunk_id: 1, text: "Bar" },
|
||||
]);
|
||||
|
||||
const newChunks = [{ doc_id: 1, chunk_id: 0, text: "Baz" }];
|
||||
|
||||
await table3
|
||||
.mergeInsert(["doc_id", "chunk_id"])
|
||||
.whenMatchedUpdateAll()
|
||||
.whenNotMatchedInsertAll()
|
||||
.whenNotMatchedBySourceDelete({ where: "doc_id = 1" })
|
||||
.execute(newChunks);
|
||||
|
||||
await table3.countRows("doc_id = 1"); // 1
|
||||
// --8<-- [end:replace_range]
|
||||
expect(await table3.countRows("doc_id = 1")).toBe(1);
|
||||
});
|
||||
@@ -6,7 +6,7 @@ import { withTempDirectory } from "./util.ts";
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import "@lancedb/lancedb/embedding/transformers";
|
||||
import { LanceSchema, getRegistry } from "@lancedb/lancedb/embedding";
|
||||
import { EmbeddingFunction } from "@lancedb/lancedb/embedding";
|
||||
import type { EmbeddingFunction } from "@lancedb/lancedb/embedding";
|
||||
import { Utf8 } from "apache-arrow";
|
||||
|
||||
test("full text search", async () => {
|
||||
@@ -58,6 +58,6 @@ test("full text search", async () => {
|
||||
const query = "How many bones are in the human body?";
|
||||
const actual = await tbl.search(query).limit(1).toArray();
|
||||
|
||||
expect(actual[0]["text"]).toBe("The human body has 206 bones.");
|
||||
expect(actual[0].text).toBe("The human body has 206 bones.");
|
||||
});
|
||||
}, 100_000);
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
import * as fs from "fs";
|
||||
import { tmpdir } from "os";
|
||||
import * as path from "path";
|
||||
import * as fs from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import * as path from "node:path";
|
||||
|
||||
export async function withTempDirectory(
|
||||
fn: (tempDir: string) => Promise<void>,
|
||||
|
||||
@@ -2,31 +2,37 @@
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import {
|
||||
Data as ArrowData,
|
||||
Table as ArrowTable,
|
||||
Binary,
|
||||
Bool,
|
||||
BufferType,
|
||||
DataType,
|
||||
Dictionary,
|
||||
Field,
|
||||
FixedSizeBinary,
|
||||
FixedSizeList,
|
||||
Float,
|
||||
Float32,
|
||||
Float64,
|
||||
Int,
|
||||
Int32,
|
||||
Int64,
|
||||
LargeBinary,
|
||||
List,
|
||||
Null,
|
||||
RecordBatch,
|
||||
RecordBatchFileReader,
|
||||
RecordBatchFileWriter,
|
||||
RecordBatchReader,
|
||||
RecordBatchStreamWriter,
|
||||
Schema,
|
||||
Struct,
|
||||
Utf8,
|
||||
Vector,
|
||||
makeVector as arrowMakeVector,
|
||||
makeBuilder,
|
||||
makeData,
|
||||
type makeTable,
|
||||
makeTable,
|
||||
vectorFromArray,
|
||||
} from "apache-arrow";
|
||||
import { Buffers } from "apache-arrow/data";
|
||||
@@ -236,8 +242,6 @@ export class MakeArrowTableOptions {
|
||||
* This function converts an array of Record<String, any> (row-major JS objects)
|
||||
* to an Arrow Table (a columnar structure)
|
||||
*
|
||||
* Note that it currently does not support nulls.
|
||||
*
|
||||
* If a schema is provided then it will be used to determine the resulting array
|
||||
* types. Fields will also be reordered to fit the order defined by the schema.
|
||||
*
|
||||
@@ -245,6 +249,9 @@ export class MakeArrowTableOptions {
|
||||
* will be controlled by the order of properties in the first record. If a type
|
||||
* is inferred it will always be nullable.
|
||||
*
|
||||
* If not all fields are found in the data, then a subset of the schema will be
|
||||
* returned.
|
||||
*
|
||||
* If the input is empty then a schema must be provided to create an empty table.
|
||||
*
|
||||
* When a schema is not specified then data types will be inferred. The inference
|
||||
@@ -252,11 +259,13 @@ export class MakeArrowTableOptions {
|
||||
*
|
||||
* - boolean => Bool
|
||||
* - number => Float64
|
||||
* - bigint => Int64
|
||||
* - String => Utf8
|
||||
* - Buffer => Binary
|
||||
* - Record<String, any> => Struct
|
||||
* - Array<any> => List
|
||||
* @example
|
||||
* ```ts
|
||||
* import { fromTableToBuffer, makeArrowTable } from "../arrow";
|
||||
* import { Field, FixedSizeList, Float16, Float32, Int32, Schema } from "apache-arrow";
|
||||
*
|
||||
@@ -278,43 +287,41 @@ export class MakeArrowTableOptions {
|
||||
* names and data types.
|
||||
*
|
||||
* ```ts
|
||||
*
|
||||
* const schema = new Schema([
|
||||
new Field("a", new Float64()),
|
||||
new Field("b", new Float64()),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(3, new Field("item", new Float32()))
|
||||
),
|
||||
]);
|
||||
const table = makeArrowTable([
|
||||
{ a: 1, b: 2, vector: [1, 2, 3] },
|
||||
{ a: 4, b: 5, vector: [4, 5, 6] },
|
||||
{ a: 7, b: 8, vector: [7, 8, 9] },
|
||||
]);
|
||||
assert.deepEqual(table.schema, schema);
|
||||
* new Field("a", new Float64()),
|
||||
* new Field("b", new Float64()),
|
||||
* new Field(
|
||||
* "vector",
|
||||
* new FixedSizeList(3, new Field("item", new Float32()))
|
||||
* ),
|
||||
* ]);
|
||||
* const table = makeArrowTable([
|
||||
* { a: 1, b: 2, vector: [1, 2, 3] },
|
||||
* { a: 4, b: 5, vector: [4, 5, 6] },
|
||||
* { a: 7, b: 8, vector: [7, 8, 9] },
|
||||
* ]);
|
||||
* assert.deepEqual(table.schema, schema);
|
||||
* ```
|
||||
*
|
||||
* You can specify the vector column types and names using the options as well
|
||||
*
|
||||
* ```typescript
|
||||
*
|
||||
* ```ts
|
||||
* const schema = new Schema([
|
||||
new Field('a', new Float64()),
|
||||
new Field('b', new Float64()),
|
||||
new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
|
||||
new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
|
||||
]);
|
||||
* new Field('a', new Float64()),
|
||||
* new Field('b', new Float64()),
|
||||
* new Field('vec1', new FixedSizeList(3, new Field('item', new Float16()))),
|
||||
* new Field('vec2', new FixedSizeList(3, new Field('item', new Float16())))
|
||||
* ]);
|
||||
* const table = makeArrowTable([
|
||||
{ a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
|
||||
{ a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
|
||||
{ a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
|
||||
], {
|
||||
vectorColumns: {
|
||||
vec1: { type: new Float16() },
|
||||
vec2: { type: new Float16() }
|
||||
}
|
||||
}
|
||||
* { a: 1, b: 2, vec1: [1, 2, 3], vec2: [2, 4, 6] },
|
||||
* { a: 4, b: 5, vec1: [4, 5, 6], vec2: [8, 10, 12] },
|
||||
* { a: 7, b: 8, vec1: [7, 8, 9], vec2: [14, 16, 18] }
|
||||
* ], {
|
||||
* vectorColumns: {
|
||||
* vec1: { type: new Float16() },
|
||||
* vec2: { type: new Float16() }
|
||||
* }
|
||||
* }
|
||||
* assert.deepEqual(table.schema, schema)
|
||||
* ```
|
||||
*/
|
||||
@@ -323,126 +330,316 @@ export function makeArrowTable(
|
||||
options?: Partial<MakeArrowTableOptions>,
|
||||
metadata?: Map<string, string>,
|
||||
): ArrowTable {
|
||||
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
|
||||
let schema: Schema | undefined = undefined;
|
||||
if (opt.schema !== undefined && opt.schema !== null) {
|
||||
schema = sanitizeSchema(opt.schema);
|
||||
schema = validateSchemaEmbeddings(
|
||||
schema as Schema,
|
||||
data,
|
||||
options?.embeddingFunction,
|
||||
);
|
||||
}
|
||||
|
||||
let schemaMetadata = schema?.metadata || new Map<string, string>();
|
||||
if (metadata !== undefined) {
|
||||
schemaMetadata = new Map([...schemaMetadata, ...metadata]);
|
||||
}
|
||||
|
||||
if (
|
||||
data.length === 0 &&
|
||||
(options?.schema === undefined || options?.schema === null)
|
||||
) {
|
||||
throw new Error("At least one record or a schema needs to be provided");
|
||||
} else if (data.length === 0) {
|
||||
if (schema === undefined) {
|
||||
throw new Error("A schema must be provided if data is empty");
|
||||
} else {
|
||||
schema = new Schema(schema.fields, schemaMetadata);
|
||||
return new ArrowTable(schema);
|
||||
}
|
||||
}
|
||||
|
||||
const opt = new MakeArrowTableOptions(options !== undefined ? options : {});
|
||||
if (opt.schema !== undefined && opt.schema !== null) {
|
||||
opt.schema = sanitizeSchema(opt.schema);
|
||||
opt.schema = validateSchemaEmbeddings(
|
||||
opt.schema as Schema,
|
||||
data,
|
||||
options?.embeddingFunction,
|
||||
);
|
||||
}
|
||||
const columns: Record<string, Vector> = {};
|
||||
// TODO: sample dataset to find missing columns
|
||||
// Prefer the field ordering of the schema, if present
|
||||
const columnNames =
|
||||
opt.schema != null ? (opt.schema.names as string[]) : Object.keys(data[0]);
|
||||
for (const colName of columnNames) {
|
||||
if (
|
||||
data.length !== 0 &&
|
||||
!Object.prototype.hasOwnProperty.call(data[0], colName)
|
||||
) {
|
||||
// The field is present in the schema, but not in the data, skip it
|
||||
continue;
|
||||
}
|
||||
// Extract a single column from the records (transpose from row-major to col-major)
|
||||
let values = data.map((datum) => datum[colName]);
|
||||
let inferredSchema = inferSchema(data, schema, opt);
|
||||
inferredSchema = new Schema(inferredSchema.fields, schemaMetadata);
|
||||
|
||||
// By default (type === undefined) arrow will infer the type from the JS type
|
||||
let type;
|
||||
if (opt.schema !== undefined) {
|
||||
// If there is a schema provided, then use that for the type instead
|
||||
type = opt.schema?.fields.filter((f) => f.name === colName)[0]?.type;
|
||||
if (DataType.isInt(type) && type.bitWidth === 64) {
|
||||
// wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
|
||||
values = values.map((v) => {
|
||||
if (v === null) {
|
||||
return v;
|
||||
const finalColumns: Record<string, Vector> = {};
|
||||
for (const field of inferredSchema.fields) {
|
||||
finalColumns[field.name] = transposeData(data, field);
|
||||
}
|
||||
if (typeof v === "bigint") {
|
||||
return v;
|
||||
}
|
||||
if (typeof v === "number") {
|
||||
return BigInt(v);
|
||||
|
||||
return new ArrowTable(inferredSchema, finalColumns);
|
||||
}
|
||||
|
||||
function inferSchema(
|
||||
data: Array<Record<string, unknown>>,
|
||||
schema: Schema | undefined,
|
||||
opts: MakeArrowTableOptions,
|
||||
): Schema {
|
||||
// We will collect all fields we see in the data.
|
||||
const pathTree = new PathTree<DataType>();
|
||||
|
||||
for (const [rowI, row] of data.entries()) {
|
||||
for (const [path, value] of rowPathsAndValues(row)) {
|
||||
if (!pathTree.has(path)) {
|
||||
// First time seeing this field.
|
||||
if (schema !== undefined) {
|
||||
const field = getFieldForPath(schema, path);
|
||||
if (field === undefined) {
|
||||
throw new Error(
|
||||
`Expected BigInt or number for column ${colName}, got ${typeof v}`,
|
||||
`Found field not in schema: ${path.join(".")} at row ${rowI}`,
|
||||
);
|
||||
} else {
|
||||
pathTree.set(path, field.type);
|
||||
}
|
||||
} else {
|
||||
const inferredType = inferType(value, path, opts);
|
||||
if (inferredType === undefined) {
|
||||
throw new Error(`Failed to infer data type for field ${path.join(".")} at row ${rowI}. \
|
||||
Consider providing an explicit schema.`);
|
||||
}
|
||||
pathTree.set(path, inferredType);
|
||||
}
|
||||
} else if (schema === undefined) {
|
||||
const currentType = pathTree.get(path);
|
||||
const newType = inferType(value, path, opts);
|
||||
if (currentType !== newType) {
|
||||
new Error(`Failed to infer schema for data. Previously inferred type \
|
||||
${currentType} but found ${newType} at row ${rowI}. Consider \
|
||||
providing an explicit schema.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (schema === undefined) {
|
||||
function fieldsFromPathTree(pathTree: PathTree<DataType>): Field[] {
|
||||
const fields = [];
|
||||
for (const [name, value] of pathTree.map.entries()) {
|
||||
if (value instanceof PathTree) {
|
||||
const children = fieldsFromPathTree(value);
|
||||
fields.push(new Field(name, new Struct(children), true));
|
||||
} else {
|
||||
fields.push(new Field(name, value, true));
|
||||
}
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
const fields = fieldsFromPathTree(pathTree);
|
||||
return new Schema(fields);
|
||||
} else {
|
||||
function takeMatchingFields(
|
||||
fields: Field[],
|
||||
pathTree: PathTree<DataType>,
|
||||
): Field[] {
|
||||
const outFields = [];
|
||||
for (const field of fields) {
|
||||
if (pathTree.map.has(field.name)) {
|
||||
const value = pathTree.get([field.name]);
|
||||
if (value instanceof PathTree) {
|
||||
const struct = field.type as Struct;
|
||||
const children = takeMatchingFields(struct.children, value);
|
||||
outFields.push(
|
||||
new Field(field.name, new Struct(children), field.nullable),
|
||||
);
|
||||
} else {
|
||||
outFields.push(
|
||||
new Field(field.name, value as DataType, field.nullable),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
return outFields;
|
||||
}
|
||||
const fields = takeMatchingFields(schema.fields, pathTree);
|
||||
return new Schema(fields);
|
||||
}
|
||||
}
|
||||
|
||||
function* rowPathsAndValues(
|
||||
row: Record<string, unknown>,
|
||||
basePath: string[] = [],
|
||||
): Generator<[string[], unknown]> {
|
||||
for (const [key, value] of Object.entries(row)) {
|
||||
if (isObject(value)) {
|
||||
yield* rowPathsAndValues(value, [...basePath, key]);
|
||||
} else {
|
||||
yield [[...basePath, key], value];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return (
|
||||
typeof value === "object" &&
|
||||
value !== null &&
|
||||
!Array.isArray(value) &&
|
||||
!(value instanceof RegExp) &&
|
||||
!(value instanceof Date) &&
|
||||
!(value instanceof Set) &&
|
||||
!(value instanceof Map) &&
|
||||
!(value instanceof Buffer)
|
||||
);
|
||||
}
|
||||
|
||||
function getFieldForPath(schema: Schema, path: string[]): Field | undefined {
|
||||
let current: Field | Schema = schema;
|
||||
for (const key of path) {
|
||||
if (current instanceof Schema) {
|
||||
const field: Field | undefined = current.fields.find(
|
||||
(f) => f.name === key,
|
||||
);
|
||||
if (field === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
current = field;
|
||||
} else if (current instanceof Field && DataType.isStruct(current.type)) {
|
||||
const struct: Struct = current.type;
|
||||
const field = struct.children.find((f) => f.name === key);
|
||||
if (field === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
current = field;
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
if (current instanceof Field) {
|
||||
return current;
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to infer which Arrow type to use for a given value.
|
||||
*
|
||||
* May return undefined if the type cannot be inferred.
|
||||
*/
|
||||
function inferType(
|
||||
value: unknown,
|
||||
path: string[],
|
||||
opts: MakeArrowTableOptions,
|
||||
): DataType | undefined {
|
||||
if (typeof value === "bigint") {
|
||||
return new Int64();
|
||||
} else if (typeof value === "number") {
|
||||
// Even if it's an integer, it's safer to assume Float64. Users can
|
||||
// always provide an explicit schema or use BigInt if they mean integer.
|
||||
return new Float64();
|
||||
} else if (typeof value === "string") {
|
||||
if (opts.dictionaryEncodeStrings) {
|
||||
return new Dictionary(new Utf8(), new Int32());
|
||||
} else {
|
||||
return new Utf8();
|
||||
}
|
||||
} else if (typeof value === "boolean") {
|
||||
return new Bool();
|
||||
} else if (value instanceof Buffer) {
|
||||
return new Binary();
|
||||
} else if (Array.isArray(value)) {
|
||||
if (value.length === 0) {
|
||||
return undefined; // Without any values we can't infer the type
|
||||
}
|
||||
if (path.length === 1 && Object.hasOwn(opts.vectorColumns, path[0])) {
|
||||
const floatType = sanitizeType(opts.vectorColumns[path[0]].type);
|
||||
return new FixedSizeList(
|
||||
value.length,
|
||||
new Field("item", floatType, true),
|
||||
);
|
||||
}
|
||||
const valueType = inferType(value[0], path, opts);
|
||||
if (valueType === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
// Try to automatically detect embedding columns.
|
||||
if (valueType instanceof Float && path[path.length - 1] === "vector") {
|
||||
// We default to Float32 for vectors.
|
||||
const child = new Field("item", new Float32(), true);
|
||||
return new FixedSizeList(value.length, child);
|
||||
} else {
|
||||
const child = new Field("item", valueType, true);
|
||||
return new List(child);
|
||||
}
|
||||
} else {
|
||||
// TODO: timestamp
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
class PathTree<V> {
|
||||
map: Map<string, V | PathTree<V>>;
|
||||
|
||||
constructor(entries?: [string[], V][]) {
|
||||
this.map = new Map();
|
||||
if (entries !== undefined) {
|
||||
for (const [path, value] of entries) {
|
||||
this.set(path, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
has(path: string[]): boolean {
|
||||
let ref: PathTree<V> = this;
|
||||
for (const part of path) {
|
||||
if (!(ref instanceof PathTree) || !ref.map.has(part)) {
|
||||
return false;
|
||||
}
|
||||
ref = ref.map.get(part) as PathTree<V>;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
get(path: string[]): V | undefined {
|
||||
let ref: PathTree<V> = this;
|
||||
for (const part of path) {
|
||||
if (!(ref instanceof PathTree) || !ref.map.has(part)) {
|
||||
return undefined;
|
||||
}
|
||||
ref = ref.map.get(part) as PathTree<V>;
|
||||
}
|
||||
return ref as V;
|
||||
}
|
||||
set(path: string[], value: V): void {
|
||||
let ref: PathTree<V> = this;
|
||||
for (const part of path.slice(0, path.length - 1)) {
|
||||
if (!ref.map.has(part)) {
|
||||
ref.map.set(part, new PathTree<V>());
|
||||
}
|
||||
ref = ref.map.get(part) as PathTree<V>;
|
||||
}
|
||||
ref.map.set(path[path.length - 1], value);
|
||||
}
|
||||
}
|
||||
|
||||
function transposeData(
|
||||
data: Record<string, unknown>[],
|
||||
field: Field,
|
||||
path: string[] = [],
|
||||
): Vector {
|
||||
if (field.type instanceof Struct) {
|
||||
const childFields = field.type.children;
|
||||
const childVectors = childFields.map((child) => {
|
||||
return transposeData(data, child, [...path, child.name]);
|
||||
});
|
||||
}
|
||||
const structData = makeData({
|
||||
type: field.type,
|
||||
children: childVectors as unknown as ArrowData<DataType>[],
|
||||
});
|
||||
return arrowMakeVector(structData);
|
||||
} else {
|
||||
// Otherwise, check to see if this column is one of the vector columns
|
||||
// defined by opt.vectorColumns and, if so, use the fixed size list type
|
||||
const vectorColumnOptions = opt.vectorColumns[colName];
|
||||
if (vectorColumnOptions !== undefined) {
|
||||
const firstNonNullValue = values.find((v) => v !== null);
|
||||
if (Array.isArray(firstNonNullValue)) {
|
||||
type = newVectorType(
|
||||
firstNonNullValue.length,
|
||||
vectorColumnOptions.type,
|
||||
);
|
||||
const valuesPath = [...path, field.name];
|
||||
const values = data.map((datum) => {
|
||||
let current: unknown = datum;
|
||||
for (const key of valuesPath) {
|
||||
if (isObject(current) && Object.hasOwn(current, key)) {
|
||||
current = current[key];
|
||||
} else {
|
||||
throw new Error(
|
||||
`Column ${colName} is expected to be a vector column but first non-null value is not an array. Could not determine size of vector column`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return current;
|
||||
});
|
||||
return makeVector(values, field.type);
|
||||
}
|
||||
|
||||
try {
|
||||
// Convert an Array of JS values to an arrow vector
|
||||
columns[colName] = makeVector(values, type, opt.dictionaryEncodeStrings);
|
||||
} catch (error: unknown) {
|
||||
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
|
||||
throw Error(`Could not convert column "${colName}" to Arrow: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (opt.schema != null) {
|
||||
// `new ArrowTable(columns)` infers a schema which may sometimes have
|
||||
// incorrect nullability (it assumes nullable=true always)
|
||||
//
|
||||
// `new ArrowTable(schema, columns)` will also fail because it will create a
|
||||
// batch with an inferred schema and then complain that the batch schema
|
||||
// does not match the provided schema.
|
||||
//
|
||||
// To work around this we first create a table with the wrong schema and
|
||||
// then patch the schema of the batches so we can use
|
||||
// `new ArrowTable(schema, batches)` which does not do any schema inference
|
||||
const firstTable = new ArrowTable(columns);
|
||||
const batchesFixed = firstTable.batches.map(
|
||||
(batch) => new RecordBatch(opt.schema as Schema, batch.data),
|
||||
);
|
||||
let schema: Schema;
|
||||
if (metadata !== undefined) {
|
||||
let schemaMetadata = opt.schema.metadata;
|
||||
if (schemaMetadata.size === 0) {
|
||||
schemaMetadata = metadata;
|
||||
} else {
|
||||
for (const [key, entry] of schemaMetadata.entries()) {
|
||||
schemaMetadata.set(key, entry);
|
||||
}
|
||||
}
|
||||
|
||||
schema = new Schema(opt.schema.fields as Field[], schemaMetadata);
|
||||
} else {
|
||||
schema = opt.schema as Schema;
|
||||
}
|
||||
return new ArrowTable(schema, batchesFixed);
|
||||
}
|
||||
const tbl = new ArrowTable(columns);
|
||||
if (metadata !== undefined) {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
(<any>tbl.schema).metadata = metadata;
|
||||
}
|
||||
return tbl;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -492,6 +689,31 @@ function makeVector(
|
||||
): Vector<any> {
|
||||
if (type !== undefined) {
|
||||
// No need for inference, let Arrow create it
|
||||
if (type instanceof Int) {
|
||||
if (DataType.isInt(type) && type.bitWidth === 64) {
|
||||
// wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
|
||||
values = values.map((v) => {
|
||||
if (v === null) {
|
||||
return v;
|
||||
} else if (typeof v === "bigint") {
|
||||
return v;
|
||||
} else if (typeof v === "number") {
|
||||
return BigInt(v);
|
||||
} else {
|
||||
return v;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// Similarly, bigint isn't supported for 16 or 32-bit ints.
|
||||
values = values.map((v) => {
|
||||
if (typeof v == "bigint") {
|
||||
return Number(v);
|
||||
} else {
|
||||
return v;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
return vectorFromArray(values, type);
|
||||
}
|
||||
if (values.length === 0) {
|
||||
@@ -903,7 +1125,7 @@ function validateSchemaEmbeddings(
|
||||
schema: Schema,
|
||||
data: Array<Record<string, unknown>>,
|
||||
embeddings: EmbeddingFunctionConfig | undefined,
|
||||
) {
|
||||
): Schema {
|
||||
const fields = [];
|
||||
const missingEmbeddingFields = [];
|
||||
|
||||
|
||||
@@ -1,10 +1,23 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import { Data, Schema, SchemaLike, TableLike } from "./arrow";
|
||||
import { fromTableToBuffer, makeEmptyTable } from "./arrow";
|
||||
import {
|
||||
Data,
|
||||
Schema,
|
||||
SchemaLike,
|
||||
TableLike,
|
||||
fromTableToStreamBuffer,
|
||||
isArrowTable,
|
||||
makeArrowTable,
|
||||
} from "./arrow";
|
||||
import {
|
||||
Table as ArrowTable,
|
||||
fromTableToBuffer,
|
||||
makeEmptyTable,
|
||||
} from "./arrow";
|
||||
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
||||
import { Connection as LanceDbConnection } from "./native";
|
||||
import { sanitizeTable } from "./sanitize";
|
||||
import { LocalTable, Table } from "./table";
|
||||
|
||||
export interface CreateTableOptions {
|
||||
@@ -39,6 +52,8 @@ export interface CreateTableOptions {
|
||||
*
|
||||
* The default is `stable`.
|
||||
* Set to "legacy" to use the old format.
|
||||
*
|
||||
* @deprecated Pass `new_table_data_storage_version` to storageOptions instead.
|
||||
*/
|
||||
dataStorageVersion?: string;
|
||||
|
||||
@@ -48,17 +63,11 @@ export interface CreateTableOptions {
|
||||
* turning this on will make the dataset unreadable for older versions
|
||||
* of LanceDB (prior to 0.10.0). To migrate an existing dataset, instead
|
||||
* use the {@link LocalTable#migrateManifestPathsV2} method.
|
||||
*
|
||||
* @deprecated Pass `new_table_enable_v2_manifest_paths` to storageOptions instead.
|
||||
*/
|
||||
enableV2ManifestPaths?: boolean;
|
||||
|
||||
/**
|
||||
* If true then data files will be written with the legacy format
|
||||
*
|
||||
* The default is false.
|
||||
*
|
||||
* Deprecated. Use data storage version instead.
|
||||
*/
|
||||
useLegacyFormat?: boolean;
|
||||
schema?: SchemaLike;
|
||||
embeddingFunction?: EmbeddingFunctionConfig;
|
||||
}
|
||||
@@ -116,6 +125,7 @@ export interface TableNamesOptions {
|
||||
*
|
||||
* Any created tables are independent and will continue to work even if
|
||||
* the underlying connection has been closed.
|
||||
* @hideconstructor
|
||||
*/
|
||||
export abstract class Connection {
|
||||
[Symbol.for("nodejs.util.inspect.custom")](): string {
|
||||
@@ -201,11 +211,18 @@ export abstract class Connection {
|
||||
* @param {string} name The name of the table to drop.
|
||||
*/
|
||||
abstract dropTable(name: string): Promise<void>;
|
||||
|
||||
/**
|
||||
* Drop all tables in the database.
|
||||
*/
|
||||
abstract dropAllTables(): Promise<void>;
|
||||
}
|
||||
|
||||
/** @hideconstructor */
|
||||
export class LocalConnection extends Connection {
|
||||
readonly inner: LanceDbConnection;
|
||||
|
||||
/** @hidden */
|
||||
constructor(inner: LanceDbConnection) {
|
||||
super();
|
||||
this.inner = inner;
|
||||
@@ -240,6 +257,28 @@ export class LocalConnection extends Connection {
|
||||
return new LocalTable(innerTable);
|
||||
}
|
||||
|
||||
private getStorageOptions(
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Record<string, string> | undefined {
|
||||
if (options?.dataStorageVersion !== undefined) {
|
||||
if (options.storageOptions === undefined) {
|
||||
options.storageOptions = {};
|
||||
}
|
||||
options.storageOptions["newTableDataStorageVersion"] =
|
||||
options.dataStorageVersion;
|
||||
}
|
||||
|
||||
if (options?.enableV2ManifestPaths !== undefined) {
|
||||
if (options.storageOptions === undefined) {
|
||||
options.storageOptions = {};
|
||||
}
|
||||
options.storageOptions["newTableEnableV2ManifestPaths"] =
|
||||
options.enableV2ManifestPaths ? "true" : "false";
|
||||
}
|
||||
|
||||
return cleanseStorageOptions(options?.storageOptions);
|
||||
}
|
||||
|
||||
async createTable(
|
||||
nameOrOptions:
|
||||
| string
|
||||
@@ -255,21 +294,15 @@ export class LocalConnection extends Connection {
|
||||
if (data === undefined) {
|
||||
throw new Error("data is required");
|
||||
}
|
||||
const { buf, mode } = await Table.parseTableData(data, options);
|
||||
let dataStorageVersion = "stable";
|
||||
if (options?.dataStorageVersion !== undefined) {
|
||||
dataStorageVersion = options.dataStorageVersion;
|
||||
} else if (options?.useLegacyFormat !== undefined) {
|
||||
dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
|
||||
}
|
||||
const { buf, mode } = await parseTableData(data, options);
|
||||
|
||||
const storageOptions = this.getStorageOptions(options);
|
||||
|
||||
const innerTable = await this.inner.createTable(
|
||||
nameOrOptions,
|
||||
buf,
|
||||
mode,
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
dataStorageVersion,
|
||||
options?.enableV2ManifestPaths,
|
||||
storageOptions,
|
||||
);
|
||||
|
||||
return new LocalTable(innerTable);
|
||||
@@ -293,22 +326,14 @@ export class LocalConnection extends Connection {
|
||||
metadata = registry.getTableMetadata([embeddingFunction]);
|
||||
}
|
||||
|
||||
let dataStorageVersion = "stable";
|
||||
if (options?.dataStorageVersion !== undefined) {
|
||||
dataStorageVersion = options.dataStorageVersion;
|
||||
} else if (options?.useLegacyFormat !== undefined) {
|
||||
dataStorageVersion = options.useLegacyFormat ? "legacy" : "stable";
|
||||
}
|
||||
|
||||
const storageOptions = this.getStorageOptions(options);
|
||||
const table = makeEmptyTable(schema, metadata);
|
||||
const buf = await fromTableToBuffer(table);
|
||||
const innerTable = await this.inner.createEmptyTable(
|
||||
name,
|
||||
buf,
|
||||
mode,
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
dataStorageVersion,
|
||||
options?.enableV2ManifestPaths,
|
||||
storageOptions,
|
||||
);
|
||||
return new LocalTable(innerTable);
|
||||
}
|
||||
@@ -316,6 +341,10 @@ export class LocalConnection extends Connection {
|
||||
async dropTable(name: string): Promise<void> {
|
||||
return this.inner.dropTable(name);
|
||||
}
|
||||
|
||||
async dropAllTables(): Promise<void> {
|
||||
return this.inner.dropAllTables();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -357,3 +386,38 @@ function camelToSnakeCase(camel: string): string {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
async function parseTableData(
|
||||
data: Record<string, unknown>[] | TableLike,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
streaming = false,
|
||||
) {
|
||||
let mode: string = options?.mode ?? "create";
|
||||
const existOk = options?.existOk ?? false;
|
||||
|
||||
if (mode === "create" && existOk) {
|
||||
mode = "exist_ok";
|
||||
}
|
||||
|
||||
let table: ArrowTable;
|
||||
if (isArrowTable(data)) {
|
||||
table = sanitizeTable(data);
|
||||
} else {
|
||||
table = makeArrowTable(data as Record<string, unknown>[], options);
|
||||
}
|
||||
if (streaming) {
|
||||
const buf = await fromTableToStreamBuffer(
|
||||
table,
|
||||
options?.embeddingFunction,
|
||||
options?.schema,
|
||||
);
|
||||
return { buf, mode };
|
||||
} else {
|
||||
const buf = await fromTableToBuffer(
|
||||
table,
|
||||
options?.embeddingFunction,
|
||||
options?.schema,
|
||||
);
|
||||
return { buf, mode };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import {
|
||||
newVectorType,
|
||||
} from "../arrow";
|
||||
import { sanitizeType } from "../sanitize";
|
||||
import { getRegistry } from "./registry";
|
||||
|
||||
/**
|
||||
* Options for a given embedding function
|
||||
@@ -32,6 +33,22 @@ export interface EmbeddingFunctionConstructor<
|
||||
|
||||
/**
|
||||
* An embedding function that automatically creates vector representation for a given column.
|
||||
*
|
||||
* It's important subclasses pass the **original** options to the super constructor
|
||||
* and then pass those options to `resolveVariables` to resolve any variables before
|
||||
* using them.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* class MyEmbeddingFunction extends EmbeddingFunction {
|
||||
* constructor(options: {model: string, timeout: number}) {
|
||||
* super(optionsRaw);
|
||||
* const options = this.resolveVariables(optionsRaw);
|
||||
* this.model = options.model;
|
||||
* this.timeout = options.timeout;
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
export abstract class EmbeddingFunction<
|
||||
// biome-ignore lint/suspicious/noExplicitAny: we don't know what the implementor will do
|
||||
@@ -44,33 +61,74 @@ export abstract class EmbeddingFunction<
|
||||
*/
|
||||
// biome-ignore lint/style/useNamingConvention: we want to keep the name as it is
|
||||
readonly TOptions!: M;
|
||||
/**
|
||||
* Convert the embedding function to a JSON object
|
||||
* It is used to serialize the embedding function to the schema
|
||||
* It's important that any object returned by this method contains all the necessary
|
||||
* information to recreate the embedding function
|
||||
*
|
||||
* It should return the same object that was passed to the constructor
|
||||
* If it does not, the embedding function will not be able to be recreated, or could be recreated incorrectly
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* class MyEmbeddingFunction extends EmbeddingFunction {
|
||||
* constructor(options: {model: string, timeout: number}) {
|
||||
* super();
|
||||
* this.model = options.model;
|
||||
* this.timeout = options.timeout;
|
||||
* }
|
||||
* toJSON() {
|
||||
* return {
|
||||
* model: this.model,
|
||||
* timeout: this.timeout,
|
||||
* };
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
abstract toJSON(): Partial<M>;
|
||||
|
||||
#config: Partial<M>;
|
||||
|
||||
/**
|
||||
* Get the original arguments to the constructor, to serialize them so they
|
||||
* can be used to recreate the embedding function later.
|
||||
*/
|
||||
// biome-ignore lint/suspicious/noExplicitAny :
|
||||
toJSON(): Record<string, any> {
|
||||
return JSON.parse(JSON.stringify(this.#config));
|
||||
}
|
||||
|
||||
constructor() {
|
||||
this.#config = {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide a list of keys in the function options that should be treated as
|
||||
* sensitive. If users pass raw values for these keys, they will be rejected.
|
||||
*/
|
||||
protected getSensitiveKeys(): string[] {
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply variables to the config.
|
||||
*/
|
||||
protected resolveVariables(config: Partial<M>): Partial<M> {
|
||||
this.#config = config;
|
||||
const registry = getRegistry();
|
||||
const newConfig = { ...config };
|
||||
for (const [key_, value] of Object.entries(newConfig)) {
|
||||
if (
|
||||
this.getSensitiveKeys().includes(key_) &&
|
||||
!value.startsWith("$var:")
|
||||
) {
|
||||
throw new Error(
|
||||
`The key "${key_}" is sensitive and cannot be set directly. Please use the $var: syntax to set it.`,
|
||||
);
|
||||
}
|
||||
// Makes TS happy (https://stackoverflow.com/a/78391854)
|
||||
const key = key_ as keyof M;
|
||||
if (typeof value === "string" && value.startsWith("$var:")) {
|
||||
const [name, defaultValue] = value.slice(5).split(":", 2);
|
||||
const variableValue = registry.getVar(name);
|
||||
if (!variableValue) {
|
||||
if (defaultValue) {
|
||||
// biome-ignore lint/suspicious/noExplicitAny:
|
||||
newConfig[key] = defaultValue as any;
|
||||
} else {
|
||||
throw new Error(`Variable "${name}" not found`);
|
||||
}
|
||||
} else {
|
||||
// biome-ignore lint/suspicious/noExplicitAny:
|
||||
newConfig[key] = variableValue as any;
|
||||
}
|
||||
}
|
||||
}
|
||||
return newConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optionally load any resources needed for the embedding function.
|
||||
*
|
||||
* This method is called after the embedding function has been initialized
|
||||
* but before any embeddings are computed. It is useful for loading local models
|
||||
* or other resources that are needed for the embedding function to work.
|
||||
*/
|
||||
async init?(): Promise<void>;
|
||||
|
||||
/**
|
||||
@@ -78,7 +136,7 @@ export abstract class EmbeddingFunction<
|
||||
*
|
||||
* @param optionsOrDatatype - The options for the field or the datatype
|
||||
*
|
||||
* @see {@link lancedb.LanceSchema}
|
||||
* @see {@link LanceSchema}
|
||||
*/
|
||||
sourceField(
|
||||
optionsOrDatatype: Partial<FieldOptions> | DataType,
|
||||
@@ -100,9 +158,9 @@ export abstract class EmbeddingFunction<
|
||||
/**
|
||||
* vectorField is used in combination with `LanceSchema` to provide a declarative data model
|
||||
*
|
||||
* @param options - The options for the field
|
||||
* @param optionsOrDatatype - The options for the field
|
||||
*
|
||||
* @see {@link lancedb.LanceSchema}
|
||||
* @see {@link LanceSchema}
|
||||
*/
|
||||
vectorField(
|
||||
optionsOrDatatype?: Partial<FieldOptions> | DataType,
|
||||
|
||||
@@ -6,7 +6,13 @@ import { sanitizeType } from "../sanitize";
|
||||
import { EmbeddingFunction } from "./embedding_function";
|
||||
import { EmbeddingFunctionConfig, getRegistry } from "./registry";
|
||||
|
||||
export { EmbeddingFunction, TextEmbeddingFunction } from "./embedding_function";
|
||||
export {
|
||||
FieldOptions,
|
||||
EmbeddingFunction,
|
||||
TextEmbeddingFunction,
|
||||
FunctionOptions,
|
||||
EmbeddingFunctionConstructor,
|
||||
} from "./embedding_function";
|
||||
|
||||
export * from "./registry";
|
||||
|
||||
|
||||
@@ -21,11 +21,13 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
|
||||
#modelName: OpenAIOptions["model"];
|
||||
|
||||
constructor(
|
||||
options: Partial<OpenAIOptions> = {
|
||||
optionsRaw: Partial<OpenAIOptions> = {
|
||||
model: "text-embedding-ada-002",
|
||||
},
|
||||
) {
|
||||
super();
|
||||
const options = this.resolveVariables(optionsRaw);
|
||||
|
||||
const openAIKey = options?.apiKey ?? process.env.OPENAI_API_KEY;
|
||||
if (!openAIKey) {
|
||||
throw new Error("OpenAI API key is required");
|
||||
@@ -52,10 +54,8 @@ export class OpenAIEmbeddingFunction extends EmbeddingFunction<
|
||||
this.#modelName = modelName;
|
||||
}
|
||||
|
||||
toJSON() {
|
||||
return {
|
||||
model: this.#modelName,
|
||||
};
|
||||
protected getSensitiveKeys(): string[] {
|
||||
return ["apiKey"];
|
||||
}
|
||||
|
||||
ndims(): number {
|
||||
|
||||
@@ -7,11 +7,11 @@ import {
|
||||
} from "./embedding_function";
|
||||
import "reflect-metadata";
|
||||
|
||||
type CreateReturnType<T> = T extends { init: () => Promise<void> }
|
||||
export type CreateReturnType<T> = T extends { init: () => Promise<void> }
|
||||
? Promise<T>
|
||||
: T;
|
||||
|
||||
interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
||||
export interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
||||
create(options?: T["TOptions"]): CreateReturnType<T>;
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ interface EmbeddingFunctionCreate<T extends EmbeddingFunction> {
|
||||
*/
|
||||
export class EmbeddingFunctionRegistry {
|
||||
#functions = new Map<string, EmbeddingFunctionConstructor>();
|
||||
#variables = new Map<string, string>();
|
||||
|
||||
/**
|
||||
* Get the number of registered functions
|
||||
@@ -33,8 +34,6 @@ export class EmbeddingFunctionRegistry {
|
||||
|
||||
/**
|
||||
* Register an embedding function
|
||||
* @param name The name of the function
|
||||
* @param func The function to register
|
||||
* @throws Error if the function is already registered
|
||||
*/
|
||||
register<
|
||||
@@ -84,10 +83,7 @@ export class EmbeddingFunctionRegistry {
|
||||
};
|
||||
} else {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
create = function (options?: any) {
|
||||
const instance = new factory(options);
|
||||
return instance;
|
||||
};
|
||||
create = (options?: any) => new factory(options);
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -166,6 +162,37 @@ export class EmbeddingFunctionRegistry {
|
||||
|
||||
return metadata;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a variable. These can be accessed in the embedding function
|
||||
* configuration using the syntax `$var:variable_name`. If they are not
|
||||
* set, an error will be thrown letting you know which key is unset. If you
|
||||
* want to supply a default value, you can add an additional part in the
|
||||
* configuration like so: `$var:variable_name:default_value`. Default values
|
||||
* can be used for runtime configurations that are not sensitive, such as
|
||||
* whether to use a GPU for inference.
|
||||
*
|
||||
* The name must not contain colons. The default value can contain colons.
|
||||
*
|
||||
* @param name
|
||||
* @param value
|
||||
*/
|
||||
setVar(name: string, value: string): void {
|
||||
if (name.includes(":")) {
|
||||
throw new Error("Variable names cannot contain colons");
|
||||
}
|
||||
this.#variables.set(name, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a variable.
|
||||
* @param name
|
||||
* @returns
|
||||
* @see {@link setVar}
|
||||
*/
|
||||
getVar(name: string): string | undefined {
|
||||
return this.#variables.get(name);
|
||||
}
|
||||
}
|
||||
|
||||
const _REGISTRY = new EmbeddingFunctionRegistry();
|
||||
|
||||
@@ -44,11 +44,12 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
|
||||
#ndims?: number;
|
||||
|
||||
constructor(
|
||||
options: Partial<XenovaTransformerOptions> = {
|
||||
optionsRaw: Partial<XenovaTransformerOptions> = {
|
||||
model: "Xenova/all-MiniLM-L6-v2",
|
||||
},
|
||||
) {
|
||||
super();
|
||||
const options = this.resolveVariables(optionsRaw);
|
||||
|
||||
const modelName = options?.model ?? "Xenova/all-MiniLM-L6-v2";
|
||||
this.#tokenizerOptions = {
|
||||
@@ -59,22 +60,6 @@ export class TransformersEmbeddingFunction extends EmbeddingFunction<
|
||||
this.#ndims = options.ndims;
|
||||
this.#modelName = modelName;
|
||||
}
|
||||
toJSON() {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||
const obj: Record<string, any> = {
|
||||
model: this.#modelName,
|
||||
};
|
||||
if (this.#ndims) {
|
||||
obj["ndims"] = this.#ndims;
|
||||
}
|
||||
if (this.#tokenizerOptions) {
|
||||
obj["tokenizerOptions"] = this.#tokenizerOptions;
|
||||
}
|
||||
if (this.#tokenizer) {
|
||||
obj["tokenizer"] = this.#tokenizer.name;
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
async init() {
|
||||
let transformers;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user