mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-06 11:52:57 +00:00
Compare commits
1 Commits
v0.22.1-be
...
v0.21.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec1fca4c6c |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.22.1-beta.0"
|
current_version = "0.21.3"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
66
CLAUDE.md
66
CLAUDE.md
@@ -13,68 +13,10 @@ Project layout:
|
|||||||
|
|
||||||
Common commands:
|
Common commands:
|
||||||
|
|
||||||
* Check for compiler errors: `cargo check --quiet --features remote --tests --examples`
|
* Check for compiler errors: `cargo check --features remote --tests --examples`
|
||||||
* Run tests: `cargo test --quiet --features remote --tests`
|
* Run tests: `cargo test --features remote --tests`
|
||||||
* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
|
* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
|
||||||
* Lint: `cargo clippy --quiet --features remote --tests --examples`
|
* Lint: `cargo clippy --features remote --tests --examples`
|
||||||
* Format: `cargo fmt --all`
|
* Format: `cargo fmt --all`
|
||||||
|
|
||||||
Before committing changes, run formatting.
|
Before committing changes, run formatting.
|
||||||
|
|
||||||
## Coding tips
|
|
||||||
|
|
||||||
* When writing Rust doctests for things that require a connection or table reference,
|
|
||||||
write them as a function instead of a fully executable test. This allows type checking
|
|
||||||
to run but avoids needing a full test environment. For example:
|
|
||||||
```rust
|
|
||||||
/// ```
|
|
||||||
/// use lance_index::scalar::FullTextSearchQuery;
|
|
||||||
/// use lancedb::query::{QueryBase, ExecutableQuery};
|
|
||||||
///
|
|
||||||
/// # use lancedb::Table;
|
|
||||||
/// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
/// let results = table.query()
|
|
||||||
/// .full_text_search(FullTextSearchQuery::new("hello world".into()))
|
|
||||||
/// .execute()
|
|
||||||
/// .await?;
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
```
|
|
||||||
|
|
||||||
## Example plan: adding a new method on Table
|
|
||||||
|
|
||||||
Adding a new method involves first adding it to the Rust core, then exposing it
|
|
||||||
in the Python and TypeScript bindings. There are both local and remote tables.
|
|
||||||
Remote tables are implemented via a HTTP API and require the `remote` cargo
|
|
||||||
feature flag to be enabled. Python has both sync and async methods.
|
|
||||||
|
|
||||||
Rust core changes:
|
|
||||||
|
|
||||||
1. Add method on `Table` struct in `rust/lancedb/src/table.rs` (calls `BaseTable` trait).
|
|
||||||
2. Add method to `BaseTable` trait in `rust/lancedb/src/table.rs`.
|
|
||||||
3. Implement new trait method on `NativeTable` in `rust/lancedb/src/table.rs`.
|
|
||||||
* Test with unit test in `rust/lancedb/src/table.rs`.
|
|
||||||
4. Implement new trait method on `RemoteTable` in `rust/lancedb/src/remote/table.rs`.
|
|
||||||
* Test with unit test in `rust/lancedb/src/remote/table.rs` against mocked endpoint.
|
|
||||||
|
|
||||||
Python bindings changes:
|
|
||||||
|
|
||||||
1. Add PyO3 method binding in `python/src/table.rs`. Run `make develop` to compile bindings.
|
|
||||||
2. Add types for PyO3 method in `python/python/lancedb/_lancedb.pyi`.
|
|
||||||
3. Add method to `AsyncTable` class in `python/python/lancedb/table.py`.
|
|
||||||
4. Add abstract method to `Table` abstract base class in `python/python/lancedb/table.py`.
|
|
||||||
5. Add concrete sync method to `LanceTable` class in `python/python/lancedb/table.py`.
|
|
||||||
* Should use `LOOP.run()` to call the corresponding `AsyncTable` method.
|
|
||||||
6. Add concrete sync method to `RemoteTable` class in `python/python/lancedb/remote/table.py`.
|
|
||||||
7. Add unit test in `python/tests/test_table.py`.
|
|
||||||
|
|
||||||
TypeScript bindings changes:
|
|
||||||
|
|
||||||
1. Add napi-rs method binding on `Table` in `nodejs/src/table.rs`.
|
|
||||||
2. Run `npm run build` to generate TypeScript definitions.
|
|
||||||
3. Add typescript method on abstract class `Table` in `nodejs/src/table.ts`.
|
|
||||||
4. Add concrete method on `LocalTable` class in `nodejs/src/native_table.ts`.
|
|
||||||
* Note: despite the name, this class is also used for remote tables.
|
|
||||||
5. Add test in `nodejs/__test__/table.test.ts`.
|
|
||||||
6. Run `npm run docs` to generate TypeScript documentation.
|
|
||||||
|
|||||||
1759
Cargo.lock
generated
1759
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
28
Cargo.toml
28
Cargo.toml
@@ -15,14 +15,14 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.35.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
lance = { "version" = "=0.33.0", "features" = ["dynamodb"] }
|
||||||
lance-io = { "version" = "=0.35.0", default-features = false, "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
lance-io = "=0.33.0"
|
||||||
lance-index = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
lance-index = "=0.33.0"
|
||||||
lance-linalg = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
lance-linalg = "=0.33.0"
|
||||||
lance-table = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
lance-table = "=0.33.0"
|
||||||
lance-testing = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
lance-testing = "=0.33.0"
|
||||||
lance-datafusion = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
lance-datafusion = "=0.33.0"
|
||||||
lance-encoding = { "version" = "=0.35.0", "tag" = "v0.35.0-beta.4", "git" = "https://github.com/lancedb/lance.git" }
|
lance-encoding = "=0.33.0"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "55.1", optional = false }
|
arrow = { version = "55.1", optional = false }
|
||||||
arrow-array = "55.1"
|
arrow-array = "55.1"
|
||||||
@@ -33,12 +33,12 @@ arrow-schema = "55.1"
|
|||||||
arrow-arith = "55.1"
|
arrow-arith = "55.1"
|
||||||
arrow-cast = "55.1"
|
arrow-cast = "55.1"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
datafusion = { version = "49.0", default-features = false }
|
datafusion = { version = "48.0", default-features = false }
|
||||||
datafusion-catalog = "49.0"
|
datafusion-catalog = "48.0"
|
||||||
datafusion-common = { version = "49.0", default-features = false }
|
datafusion-common = { version = "48.0", default-features = false }
|
||||||
datafusion-execution = "49.0"
|
datafusion-execution = "48.0"
|
||||||
datafusion-expr = "49.0"
|
datafusion-expr = "48.0"
|
||||||
datafusion-physical-plan = "49.0"
|
datafusion-physical-plan = "48.0"
|
||||||
env_logger = "0.11"
|
env_logger = "0.11"
|
||||||
half = { "version" = "2.6.0", default-features = false, features = [
|
half = { "version" = "2.6.0", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -54,52 +54,6 @@ def extract_features(line: str) -> list:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def extract_default_features(line: str) -> bool:
|
|
||||||
"""
|
|
||||||
Checks if default-features = false is present in a line in Cargo.toml.
|
|
||||||
Example: 'lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }'
|
|
||||||
Returns: True if default-features = false is present, False otherwise
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
|
|
||||||
match = re.search(r'default-features\s*=\s*false', line)
|
|
||||||
return match is not None
|
|
||||||
|
|
||||||
|
|
||||||
def dict_to_toml_line(package_name: str, config: dict) -> str:
|
|
||||||
"""
|
|
||||||
Converts a configuration dictionary to a TOML dependency line.
|
|
||||||
Dictionary insertion order is preserved (Python 3.7+), so the caller
|
|
||||||
controls the order of fields in the output.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
package_name: The name of the package (e.g., "lance", "lance-io")
|
|
||||||
config: Dictionary with keys like "version", "path", "git", "tag", "features", "default-features"
|
|
||||||
The order of keys in this dict determines the order in the output.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A properly formatted TOML line with a trailing newline
|
|
||||||
"""
|
|
||||||
# If only version is specified, use simple format
|
|
||||||
if len(config) == 1 and "version" in config:
|
|
||||||
return f'{package_name} = "{config["version"]}"\n'
|
|
||||||
|
|
||||||
# Otherwise, use inline table format
|
|
||||||
parts = []
|
|
||||||
for key, value in config.items():
|
|
||||||
if key == "default-features" and not value:
|
|
||||||
parts.append("default-features = false")
|
|
||||||
elif key == "features":
|
|
||||||
parts.append(f'"features" = {json.dumps(value)}')
|
|
||||||
elif isinstance(value, str):
|
|
||||||
parts.append(f'"{key}" = "{value}"')
|
|
||||||
else:
|
|
||||||
# This shouldn't happen with our current usage
|
|
||||||
parts.append(f'"{key}" = {json.dumps(value)}')
|
|
||||||
|
|
||||||
return f'{package_name} = {{ {", ".join(parts)} }}\n'
|
|
||||||
|
|
||||||
|
|
||||||
def update_cargo_toml(line_updater):
|
def update_cargo_toml(line_updater):
|
||||||
"""
|
"""
|
||||||
Updates the Cargo.toml file by applying the line_updater function to each line.
|
Updates the Cargo.toml file by applying the line_updater function to each line.
|
||||||
@@ -113,27 +67,20 @@ def update_cargo_toml(line_updater):
|
|||||||
is_parsing_lance_line = False
|
is_parsing_lance_line = False
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if line.startswith("lance"):
|
if line.startswith("lance"):
|
||||||
# Check if this is a single-line or multi-line entry
|
# Update the line using the provided function
|
||||||
# Single-line entries either:
|
if line.strip().endswith("}"):
|
||||||
# 1. End with } (complete inline table)
|
|
||||||
# 2. End with " (simple version string)
|
|
||||||
# Multi-line entries start with { but don't end with }
|
|
||||||
if line.strip().endswith("}") or line.strip().endswith('"'):
|
|
||||||
# Single-line entry - process immediately
|
|
||||||
new_lines.append(line_updater(line))
|
new_lines.append(line_updater(line))
|
||||||
elif "{" in line and not line.strip().endswith("}"):
|
else:
|
||||||
# Multi-line entry - start accumulating
|
|
||||||
lance_line = line
|
lance_line = line
|
||||||
is_parsing_lance_line = True
|
is_parsing_lance_line = True
|
||||||
else:
|
|
||||||
# Single-line entry without quotes or braces (shouldn't happen but handle it)
|
|
||||||
new_lines.append(line_updater(line))
|
|
||||||
elif is_parsing_lance_line:
|
elif is_parsing_lance_line:
|
||||||
lance_line += line
|
lance_line += line
|
||||||
if line.strip().endswith("}"):
|
if line.strip().endswith("}"):
|
||||||
new_lines.append(line_updater(lance_line))
|
new_lines.append(line_updater(lance_line))
|
||||||
lance_line = ""
|
lance_line = ""
|
||||||
is_parsing_lance_line = False
|
is_parsing_lance_line = False
|
||||||
|
else:
|
||||||
|
print("doesn't end with }:", line)
|
||||||
else:
|
else:
|
||||||
# Keep the line unchanged
|
# Keep the line unchanged
|
||||||
new_lines.append(line)
|
new_lines.append(line)
|
||||||
@@ -145,25 +92,18 @@ def update_cargo_toml(line_updater):
|
|||||||
def set_stable_version(version: str):
|
def set_stable_version(version: str):
|
||||||
"""
|
"""
|
||||||
Sets lines to
|
Sets lines to
|
||||||
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }
|
lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }
|
||||||
lance-io = { "version" = "=0.29.0", default-features = false }
|
lance-io = "=0.29.0"
|
||||||
...
|
...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def line_updater(line: str) -> str:
|
def line_updater(line: str) -> str:
|
||||||
package_name = line.split("=", maxsplit=1)[0].strip()
|
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||||
|
|
||||||
# Build config in desired order: version, default-features, features
|
|
||||||
config = {"version": f"={version}"}
|
|
||||||
|
|
||||||
if extract_default_features(line):
|
|
||||||
config["default-features"] = False
|
|
||||||
|
|
||||||
features = extract_features(line)
|
features = extract_features(line)
|
||||||
if features:
|
if features:
|
||||||
config["features"] = features
|
return f'{package_name} = {{ "version" = "={version}", "features" = {json.dumps(features)} }}\n'
|
||||||
|
else:
|
||||||
return dict_to_toml_line(package_name, config)
|
return f'{package_name} = "={version}"\n'
|
||||||
|
|
||||||
update_cargo_toml(line_updater)
|
update_cargo_toml(line_updater)
|
||||||
|
|
||||||
@@ -171,29 +111,19 @@ def set_stable_version(version: str):
|
|||||||
def set_preview_version(version: str):
|
def set_preview_version(version: str):
|
||||||
"""
|
"""
|
||||||
Sets lines to
|
Sets lines to
|
||||||
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
lance = { "version" = "=0.29.0", "features" = ["dynamodb"], tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
lance-io = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||||
...
|
...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def line_updater(line: str) -> str:
|
def line_updater(line: str) -> str:
|
||||||
package_name = line.split("=", maxsplit=1)[0].strip()
|
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||||
base_version = version.split("-")[0] # Get the base version without beta suffix
|
|
||||||
|
|
||||||
# Build config in desired order: version, default-features, features, tag, git
|
|
||||||
config = {"version": f"={base_version}"}
|
|
||||||
|
|
||||||
if extract_default_features(line):
|
|
||||||
config["default-features"] = False
|
|
||||||
|
|
||||||
features = extract_features(line)
|
features = extract_features(line)
|
||||||
|
base_version = version.split("-")[0] # Get the base version without beta suffix
|
||||||
if features:
|
if features:
|
||||||
config["features"] = features
|
return f'{package_name} = {{ "version" = "={base_version}", "features" = {json.dumps(features)}, "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
|
||||||
|
else:
|
||||||
config["tag"] = f"v{version}"
|
return f'{package_name} = {{ "version" = "={base_version}", "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
|
||||||
config["git"] = "https://github.com/lancedb/lance.git"
|
|
||||||
|
|
||||||
return dict_to_toml_line(package_name, config)
|
|
||||||
|
|
||||||
update_cargo_toml(line_updater)
|
update_cargo_toml(line_updater)
|
||||||
|
|
||||||
@@ -201,25 +131,18 @@ def set_preview_version(version: str):
|
|||||||
def set_local_version():
|
def set_local_version():
|
||||||
"""
|
"""
|
||||||
Sets lines to
|
Sets lines to
|
||||||
lance = { "path" = "../lance/rust/lance", default-features = false, "features" = ["dynamodb"] }
|
lance = { path = "../lance/rust/lance", features = ["dynamodb"] }
|
||||||
lance-io = { "path" = "../lance/rust/lance-io", default-features = false }
|
lance-io = { path = "../lance/rust/lance-io" }
|
||||||
...
|
...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def line_updater(line: str) -> str:
|
def line_updater(line: str) -> str:
|
||||||
package_name = line.split("=", maxsplit=1)[0].strip()
|
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||||
|
|
||||||
# Build config in desired order: path, default-features, features
|
|
||||||
config = {"path": f"../lance/rust/{package_name}"}
|
|
||||||
|
|
||||||
if extract_default_features(line):
|
|
||||||
config["default-features"] = False
|
|
||||||
|
|
||||||
features = extract_features(line)
|
features = extract_features(line)
|
||||||
if features:
|
if features:
|
||||||
config["features"] = features
|
return f'{package_name} = {{ "path" = "../lance/rust/{package_name}", "features" = {json.dumps(features)} }}\n'
|
||||||
|
else:
|
||||||
return dict_to_toml_line(package_name, config)
|
return f'{package_name} = {{ "path" = "../lance/rust/{package_name}" }}\n'
|
||||||
|
|
||||||
update_cargo_toml(line_updater)
|
update_cargo_toml(line_updater)
|
||||||
|
|
||||||
|
|||||||
@@ -26,18 +26,6 @@ will be used to determine the most useful kind of index to create.
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### name?
|
|
||||||
|
|
||||||
```ts
|
|
||||||
optional name: string;
|
|
||||||
```
|
|
||||||
|
|
||||||
Optional custom name for the index.
|
|
||||||
|
|
||||||
If not provided, a default name will be generated based on the column name.
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### replace?
|
### replace?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
@@ -54,27 +42,8 @@ The default is true
|
|||||||
|
|
||||||
***
|
***
|
||||||
|
|
||||||
### train?
|
|
||||||
|
|
||||||
```ts
|
|
||||||
optional train: boolean;
|
|
||||||
```
|
|
||||||
|
|
||||||
Whether to train the index with existing data.
|
|
||||||
|
|
||||||
If true (default), the index will be trained with existing data in the table.
|
|
||||||
If false, the index will be created empty and populated as new data is added.
|
|
||||||
|
|
||||||
Note: This option is only supported for scalar indices. Vector indices always train.
|
|
||||||
|
|
||||||
***
|
|
||||||
|
|
||||||
### waitTimeoutSeconds?
|
### waitTimeoutSeconds?
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
optional waitTimeoutSeconds: number;
|
optional waitTimeoutSeconds: number;
|
||||||
```
|
```
|
||||||
|
|
||||||
Timeout in seconds to wait for index creation to complete.
|
|
||||||
|
|
||||||
If not specified, the method will return immediately after starting the index creation.
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ publish = false
|
|||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
lancedb = { path = "../../../rust/lancedb", default-features = false }
|
lancedb = { path = "../../../rust/lancedb" }
|
||||||
lance = { workspace = true }
|
lance = { workspace = true }
|
||||||
arrow = { workspace = true, features = ["ffi"] }
|
arrow = { workspace = true, features = ["ffi"] }
|
||||||
arrow-schema.workspace = true
|
arrow-schema.workspace = true
|
||||||
@@ -25,6 +25,3 @@ snafu.workspace = true
|
|||||||
lazy_static.workspace = true
|
lazy_static.workspace = true
|
||||||
serde = { version = "^1" }
|
serde = { version = "^1" }
|
||||||
serde_json = { version = "1" }
|
serde_json = { version = "1" }
|
||||||
|
|
||||||
[features]
|
|
||||||
default = ["lancedb/default"]
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.0</version>
|
<version>0.21.3-final.0</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.0</version>
|
<version>0.21.3-final.0</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.22.1-beta.0</version>
|
<version>0.21.3-final.0</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>${project.artifactId}</name>
|
<name>${project.artifactId}</name>
|
||||||
<description>LanceDB Java SDK Parent POM</description>
|
<description>LanceDB Java SDK Parent POM</description>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.22.1-beta.0"
|
version = "0.21.3"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
@@ -18,7 +18,7 @@ arrow-array.workspace = true
|
|||||||
arrow-schema.workspace = true
|
arrow-schema.workspace = true
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
lancedb = { path = "../rust/lancedb" }
|
||||||
napi = { version = "2.16.8", default-features = false, features = [
|
napi = { version = "2.16.8", default-features = false, features = [
|
||||||
"napi9",
|
"napi9",
|
||||||
"async"
|
"async"
|
||||||
@@ -36,6 +36,6 @@ aws-lc-rs = "=1.13.0"
|
|||||||
napi-build = "2.1"
|
napi-build = "2.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["remote", "lancedb/default"]
|
default = ["remote"]
|
||||||
fp16kernels = ["lancedb/fp16kernels"]
|
fp16kernels = ["lancedb/fp16kernels"]
|
||||||
remote = ["lancedb/remote"]
|
remote = ["lancedb/remote"]
|
||||||
|
|||||||
@@ -3,13 +3,7 @@
|
|||||||
|
|
||||||
import * as http from "http";
|
import * as http from "http";
|
||||||
import { RequestListener } from "http";
|
import { RequestListener } from "http";
|
||||||
import {
|
import { Connection, ConnectionOptions, connect } from "../lancedb";
|
||||||
ClientConfig,
|
|
||||||
Connection,
|
|
||||||
ConnectionOptions,
|
|
||||||
TlsConfig,
|
|
||||||
connect,
|
|
||||||
} from "../lancedb";
|
|
||||||
|
|
||||||
async function withMockDatabase(
|
async function withMockDatabase(
|
||||||
listener: RequestListener,
|
listener: RequestListener,
|
||||||
@@ -154,88 +148,4 @@ describe("remote connection", () => {
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("TlsConfig", () => {
|
|
||||||
it("should create TlsConfig with all fields", () => {
|
|
||||||
const tlsConfig: TlsConfig = {
|
|
||||||
certFile: "/path/to/cert.pem",
|
|
||||||
keyFile: "/path/to/key.pem",
|
|
||||||
sslCaCert: "/path/to/ca.pem",
|
|
||||||
assertHostname: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
expect(tlsConfig.certFile).toBe("/path/to/cert.pem");
|
|
||||||
expect(tlsConfig.keyFile).toBe("/path/to/key.pem");
|
|
||||||
expect(tlsConfig.sslCaCert).toBe("/path/to/ca.pem");
|
|
||||||
expect(tlsConfig.assertHostname).toBe(false);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should create TlsConfig with partial fields", () => {
|
|
||||||
const tlsConfig: TlsConfig = {
|
|
||||||
certFile: "/path/to/cert.pem",
|
|
||||||
keyFile: "/path/to/key.pem",
|
|
||||||
};
|
|
||||||
|
|
||||||
expect(tlsConfig.certFile).toBe("/path/to/cert.pem");
|
|
||||||
expect(tlsConfig.keyFile).toBe("/path/to/key.pem");
|
|
||||||
expect(tlsConfig.sslCaCert).toBeUndefined();
|
|
||||||
expect(tlsConfig.assertHostname).toBeUndefined();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should create ClientConfig with TlsConfig", () => {
|
|
||||||
const tlsConfig: TlsConfig = {
|
|
||||||
certFile: "/path/to/cert.pem",
|
|
||||||
keyFile: "/path/to/key.pem",
|
|
||||||
sslCaCert: "/path/to/ca.pem",
|
|
||||||
assertHostname: true,
|
|
||||||
};
|
|
||||||
|
|
||||||
const clientConfig: ClientConfig = {
|
|
||||||
userAgent: "test-agent",
|
|
||||||
tlsConfig: tlsConfig,
|
|
||||||
};
|
|
||||||
|
|
||||||
expect(clientConfig.userAgent).toBe("test-agent");
|
|
||||||
expect(clientConfig.tlsConfig).toBeDefined();
|
|
||||||
expect(clientConfig.tlsConfig?.certFile).toBe("/path/to/cert.pem");
|
|
||||||
expect(clientConfig.tlsConfig?.keyFile).toBe("/path/to/key.pem");
|
|
||||||
expect(clientConfig.tlsConfig?.sslCaCert).toBe("/path/to/ca.pem");
|
|
||||||
expect(clientConfig.tlsConfig?.assertHostname).toBe(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should handle empty TlsConfig", () => {
|
|
||||||
const tlsConfig: TlsConfig = {};
|
|
||||||
|
|
||||||
expect(tlsConfig.certFile).toBeUndefined();
|
|
||||||
expect(tlsConfig.keyFile).toBeUndefined();
|
|
||||||
expect(tlsConfig.sslCaCert).toBeUndefined();
|
|
||||||
expect(tlsConfig.assertHostname).toBeUndefined();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("should accept TlsConfig in connection options", () => {
|
|
||||||
const tlsConfig: TlsConfig = {
|
|
||||||
certFile: "/path/to/cert.pem",
|
|
||||||
keyFile: "/path/to/key.pem",
|
|
||||||
sslCaCert: "/path/to/ca.pem",
|
|
||||||
assertHostname: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Just verify that the ClientConfig accepts the TlsConfig
|
|
||||||
const clientConfig: ClientConfig = {
|
|
||||||
tlsConfig: tlsConfig,
|
|
||||||
};
|
|
||||||
|
|
||||||
const connectionOptions: ConnectionOptions = {
|
|
||||||
apiKey: "fake",
|
|
||||||
clientConfig: clientConfig,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Verify the configuration structure is correct
|
|
||||||
expect(connectionOptions.clientConfig).toBeDefined();
|
|
||||||
expect(connectionOptions.clientConfig?.tlsConfig).toBeDefined();
|
|
||||||
expect(connectionOptions.clientConfig?.tlsConfig?.certFile).toBe(
|
|
||||||
"/path/to/cert.pem",
|
|
||||||
);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -857,40 +857,6 @@ describe("When creating an index", () => {
|
|||||||
expect(stats).toBeUndefined();
|
expect(stats).toBeUndefined();
|
||||||
});
|
});
|
||||||
|
|
||||||
test("should support name and train parameters", async () => {
|
|
||||||
// Test with custom name
|
|
||||||
await tbl.createIndex("vec", {
|
|
||||||
config: Index.ivfPq({ numPartitions: 4 }),
|
|
||||||
name: "my_custom_vector_index",
|
|
||||||
});
|
|
||||||
|
|
||||||
const indices = await tbl.listIndices();
|
|
||||||
expect(indices).toHaveLength(1);
|
|
||||||
expect(indices[0].name).toBe("my_custom_vector_index");
|
|
||||||
|
|
||||||
// Test scalar index with train=false
|
|
||||||
await tbl.createIndex("id", {
|
|
||||||
config: Index.btree(),
|
|
||||||
name: "btree_empty",
|
|
||||||
train: false,
|
|
||||||
});
|
|
||||||
|
|
||||||
const allIndices = await tbl.listIndices();
|
|
||||||
expect(allIndices).toHaveLength(2);
|
|
||||||
expect(allIndices.some((idx) => idx.name === "btree_empty")).toBe(true);
|
|
||||||
|
|
||||||
// Test with both name and train=true (use tags column)
|
|
||||||
await tbl.createIndex("tags", {
|
|
||||||
config: Index.labelList(),
|
|
||||||
name: "tags_trained",
|
|
||||||
train: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
const finalIndices = await tbl.listIndices();
|
|
||||||
expect(finalIndices).toHaveLength(3);
|
|
||||||
expect(finalIndices.some((idx) => idx.name === "tags_trained")).toBe(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("create ivf_flat with binary vectors", async () => {
|
test("create ivf_flat with binary vectors", async () => {
|
||||||
const db = await connect(tmpDir.name);
|
const db = await connect(tmpDir.name);
|
||||||
const binarySchema = new Schema([
|
const binarySchema = new Schema([
|
||||||
|
|||||||
@@ -159,33 +159,17 @@ export abstract class Connection {
|
|||||||
*
|
*
|
||||||
* Tables will be returned in lexicographical order.
|
* Tables will be returned in lexicographical order.
|
||||||
* @param {Partial<TableNamesOptions>} options - options to control the
|
* @param {Partial<TableNamesOptions>} options - options to control the
|
||||||
* paging / start point (backwards compatibility)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
|
||||||
/**
|
|
||||||
* List all the table names in this database.
|
|
||||||
*
|
|
||||||
* Tables will be returned in lexicographical order.
|
|
||||||
* @param {string[]} namespace - The namespace to list tables from (defaults to root namespace)
|
|
||||||
* @param {Partial<TableNamesOptions>} options - options to control the
|
|
||||||
* paging / start point
|
* paging / start point
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
abstract tableNames(
|
abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
||||||
namespace?: string[],
|
|
||||||
options?: Partial<TableNamesOptions>,
|
|
||||||
): Promise<string[]>;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open a table in the database.
|
* Open a table in the database.
|
||||||
* @param {string} name - The name of the table
|
* @param {string} name - The name of the table
|
||||||
* @param {string[]} namespace - The namespace of the table (defaults to root namespace)
|
|
||||||
* @param {Partial<OpenTableOptions>} options - Additional options
|
|
||||||
*/
|
*/
|
||||||
abstract openTable(
|
abstract openTable(
|
||||||
name: string,
|
name: string,
|
||||||
namespace?: string[],
|
|
||||||
options?: Partial<OpenTableOptions>,
|
options?: Partial<OpenTableOptions>,
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
|
|
||||||
@@ -194,7 +178,6 @@ export abstract class Connection {
|
|||||||
* @param {object} options - The options object.
|
* @param {object} options - The options object.
|
||||||
* @param {string} options.name - The name of the table.
|
* @param {string} options.name - The name of the table.
|
||||||
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
||||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
abstract createTable(
|
abstract createTable(
|
||||||
@@ -202,72 +185,40 @@ export abstract class Connection {
|
|||||||
name: string;
|
name: string;
|
||||||
data: Data;
|
data: Data;
|
||||||
} & Partial<CreateTableOptions>,
|
} & Partial<CreateTableOptions>,
|
||||||
namespace?: string[],
|
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
/**
|
/**
|
||||||
* Creates a new Table and initialize it with new data.
|
* Creates a new Table and initialize it with new data.
|
||||||
* @param {string} name - The name of the table.
|
* @param {string} name - The name of the table.
|
||||||
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
||||||
* to be inserted into the table
|
* to be inserted into the table
|
||||||
* @param {Partial<CreateTableOptions>} options - Additional options (backwards compatibility)
|
|
||||||
*/
|
*/
|
||||||
abstract createTable(
|
abstract createTable(
|
||||||
name: string,
|
name: string,
|
||||||
data: Record<string, unknown>[] | TableLike,
|
data: Record<string, unknown>[] | TableLike,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
/**
|
|
||||||
* Creates a new Table and initialize it with new data.
|
|
||||||
* @param {string} name - The name of the table.
|
|
||||||
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
|
||||||
* to be inserted into the table
|
|
||||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
|
||||||
* @param {Partial<CreateTableOptions>} options - Additional options
|
|
||||||
*/
|
|
||||||
abstract createTable(
|
|
||||||
name: string,
|
|
||||||
data: Record<string, unknown>[] | TableLike,
|
|
||||||
namespace?: string[],
|
|
||||||
options?: Partial<CreateTableOptions>,
|
|
||||||
): Promise<Table>;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new empty Table
|
* Creates a new empty Table
|
||||||
* @param {string} name - The name of the table.
|
* @param {string} name - The name of the table.
|
||||||
* @param {Schema} schema - The schema of the table
|
* @param {Schema} schema - The schema of the table
|
||||||
* @param {Partial<CreateTableOptions>} options - Additional options (backwards compatibility)
|
|
||||||
*/
|
*/
|
||||||
abstract createEmptyTable(
|
abstract createEmptyTable(
|
||||||
name: string,
|
name: string,
|
||||||
schema: import("./arrow").SchemaLike,
|
schema: import("./arrow").SchemaLike,
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table>;
|
): Promise<Table>;
|
||||||
/**
|
|
||||||
* Creates a new empty Table
|
|
||||||
* @param {string} name - The name of the table.
|
|
||||||
* @param {Schema} schema - The schema of the table
|
|
||||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
|
||||||
* @param {Partial<CreateTableOptions>} options - Additional options
|
|
||||||
*/
|
|
||||||
abstract createEmptyTable(
|
|
||||||
name: string,
|
|
||||||
schema: import("./arrow").SchemaLike,
|
|
||||||
namespace?: string[],
|
|
||||||
options?: Partial<CreateTableOptions>,
|
|
||||||
): Promise<Table>;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Drop an existing table.
|
* Drop an existing table.
|
||||||
* @param {string} name The name of the table to drop.
|
* @param {string} name The name of the table to drop.
|
||||||
* @param {string[]} namespace The namespace of the table (defaults to root namespace).
|
|
||||||
*/
|
*/
|
||||||
abstract dropTable(name: string, namespace?: string[]): Promise<void>;
|
abstract dropTable(name: string): Promise<void>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Drop all tables in the database.
|
* Drop all tables in the database.
|
||||||
* @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
|
|
||||||
*/
|
*/
|
||||||
abstract dropAllTables(namespace?: string[]): Promise<void>;
|
abstract dropAllTables(): Promise<void>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @hideconstructor */
|
/** @hideconstructor */
|
||||||
@@ -292,39 +243,16 @@ export class LocalConnection extends Connection {
|
|||||||
return this.inner.display();
|
return this.inner.display();
|
||||||
}
|
}
|
||||||
|
|
||||||
async tableNames(
|
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
|
||||||
namespaceOrOptions?: string[] | Partial<TableNamesOptions>,
|
return this.inner.tableNames(options?.startAfter, options?.limit);
|
||||||
options?: Partial<TableNamesOptions>,
|
|
||||||
): Promise<string[]> {
|
|
||||||
// Detect if first argument is namespace array or options object
|
|
||||||
let namespace: string[] | undefined;
|
|
||||||
let tableNamesOptions: Partial<TableNamesOptions> | undefined;
|
|
||||||
|
|
||||||
if (Array.isArray(namespaceOrOptions)) {
|
|
||||||
// First argument is namespace array
|
|
||||||
namespace = namespaceOrOptions;
|
|
||||||
tableNamesOptions = options;
|
|
||||||
} else {
|
|
||||||
// First argument is options object (backwards compatibility)
|
|
||||||
namespace = undefined;
|
|
||||||
tableNamesOptions = namespaceOrOptions;
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.inner.tableNames(
|
|
||||||
namespace ?? [],
|
|
||||||
tableNamesOptions?.startAfter,
|
|
||||||
tableNamesOptions?.limit,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async openTable(
|
async openTable(
|
||||||
name: string,
|
name: string,
|
||||||
namespace?: string[],
|
|
||||||
options?: Partial<OpenTableOptions>,
|
options?: Partial<OpenTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
const innerTable = await this.inner.openTable(
|
const innerTable = await this.inner.openTable(
|
||||||
name,
|
name,
|
||||||
namespace ?? [],
|
|
||||||
cleanseStorageOptions(options?.storageOptions),
|
cleanseStorageOptions(options?.storageOptions),
|
||||||
options?.indexCacheSize,
|
options?.indexCacheSize,
|
||||||
);
|
);
|
||||||
@@ -358,44 +286,14 @@ export class LocalConnection extends Connection {
|
|||||||
nameOrOptions:
|
nameOrOptions:
|
||||||
| string
|
| string
|
||||||
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
||||||
dataOrNamespace?: Record<string, unknown>[] | TableLike | string[],
|
data?: Record<string, unknown>[] | TableLike,
|
||||||
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
|
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
||||||
// First overload: createTable(options, namespace?)
|
const { name, data, ...options } = nameOrOptions;
|
||||||
const { name, data, ...createOptions } = nameOrOptions;
|
|
||||||
const namespace = dataOrNamespace as string[] | undefined;
|
return this.createTable(name, data, options);
|
||||||
return this._createTableImpl(name, data, namespace, createOptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second overload: createTable(name, data, namespace?, options?)
|
|
||||||
const name = nameOrOptions;
|
|
||||||
const data = dataOrNamespace as Record<string, unknown>[] | TableLike;
|
|
||||||
|
|
||||||
// Detect if third argument is namespace array or options object
|
|
||||||
let namespace: string[] | undefined;
|
|
||||||
let createOptions: Partial<CreateTableOptions> | undefined;
|
|
||||||
|
|
||||||
if (Array.isArray(namespaceOrOptions)) {
|
|
||||||
// Third argument is namespace array
|
|
||||||
namespace = namespaceOrOptions;
|
|
||||||
createOptions = options;
|
|
||||||
} else {
|
|
||||||
// Third argument is options object (backwards compatibility)
|
|
||||||
namespace = undefined;
|
|
||||||
createOptions = namespaceOrOptions;
|
|
||||||
}
|
|
||||||
|
|
||||||
return this._createTableImpl(name, data, namespace, createOptions);
|
|
||||||
}
|
|
||||||
|
|
||||||
private async _createTableImpl(
|
|
||||||
name: string,
|
|
||||||
data: Data,
|
|
||||||
namespace?: string[],
|
|
||||||
options?: Partial<CreateTableOptions>,
|
|
||||||
): Promise<Table> {
|
|
||||||
if (data === undefined) {
|
if (data === undefined) {
|
||||||
throw new Error("data is required");
|
throw new Error("data is required");
|
||||||
}
|
}
|
||||||
@@ -404,10 +302,9 @@ export class LocalConnection extends Connection {
|
|||||||
const storageOptions = this.getStorageOptions(options);
|
const storageOptions = this.getStorageOptions(options);
|
||||||
|
|
||||||
const innerTable = await this.inner.createTable(
|
const innerTable = await this.inner.createTable(
|
||||||
name,
|
nameOrOptions,
|
||||||
buf,
|
buf,
|
||||||
mode,
|
mode,
|
||||||
namespace ?? [],
|
|
||||||
storageOptions,
|
storageOptions,
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -417,55 +314,39 @@ export class LocalConnection extends Connection {
|
|||||||
async createEmptyTable(
|
async createEmptyTable(
|
||||||
name: string,
|
name: string,
|
||||||
schema: import("./arrow").SchemaLike,
|
schema: import("./arrow").SchemaLike,
|
||||||
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
|
|
||||||
options?: Partial<CreateTableOptions>,
|
options?: Partial<CreateTableOptions>,
|
||||||
): Promise<Table> {
|
): Promise<Table> {
|
||||||
// Detect if third argument is namespace array or options object
|
let mode: string = options?.mode ?? "create";
|
||||||
let namespace: string[] | undefined;
|
const existOk = options?.existOk ?? false;
|
||||||
let createOptions: Partial<CreateTableOptions> | undefined;
|
|
||||||
|
|
||||||
if (Array.isArray(namespaceOrOptions)) {
|
|
||||||
// Third argument is namespace array
|
|
||||||
namespace = namespaceOrOptions;
|
|
||||||
createOptions = options;
|
|
||||||
} else {
|
|
||||||
// Third argument is options object (backwards compatibility)
|
|
||||||
namespace = undefined;
|
|
||||||
createOptions = namespaceOrOptions;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mode: string = createOptions?.mode ?? "create";
|
|
||||||
const existOk = createOptions?.existOk ?? false;
|
|
||||||
|
|
||||||
if (mode === "create" && existOk) {
|
if (mode === "create" && existOk) {
|
||||||
mode = "exist_ok";
|
mode = "exist_ok";
|
||||||
}
|
}
|
||||||
let metadata: Map<string, string> | undefined = undefined;
|
let metadata: Map<string, string> | undefined = undefined;
|
||||||
if (createOptions?.embeddingFunction !== undefined) {
|
if (options?.embeddingFunction !== undefined) {
|
||||||
const embeddingFunction = createOptions.embeddingFunction;
|
const embeddingFunction = options.embeddingFunction;
|
||||||
const registry = getRegistry();
|
const registry = getRegistry();
|
||||||
metadata = registry.getTableMetadata([embeddingFunction]);
|
metadata = registry.getTableMetadata([embeddingFunction]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const storageOptions = this.getStorageOptions(createOptions);
|
const storageOptions = this.getStorageOptions(options);
|
||||||
const table = makeEmptyTable(schema, metadata);
|
const table = makeEmptyTable(schema, metadata);
|
||||||
const buf = await fromTableToBuffer(table);
|
const buf = await fromTableToBuffer(table);
|
||||||
const innerTable = await this.inner.createEmptyTable(
|
const innerTable = await this.inner.createEmptyTable(
|
||||||
name,
|
name,
|
||||||
buf,
|
buf,
|
||||||
mode,
|
mode,
|
||||||
namespace ?? [],
|
|
||||||
storageOptions,
|
storageOptions,
|
||||||
);
|
);
|
||||||
return new LocalTable(innerTable);
|
return new LocalTable(innerTable);
|
||||||
}
|
}
|
||||||
|
|
||||||
async dropTable(name: string, namespace?: string[]): Promise<void> {
|
async dropTable(name: string): Promise<void> {
|
||||||
return this.inner.dropTable(name, namespace ?? []);
|
return this.inner.dropTable(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
async dropAllTables(namespace?: string[]): Promise<void> {
|
async dropAllTables(): Promise<void> {
|
||||||
return this.inner.dropAllTables(namespace ?? []);
|
return this.inner.dropAllTables();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ export {
|
|||||||
ClientConfig,
|
ClientConfig,
|
||||||
TimeoutConfig,
|
TimeoutConfig,
|
||||||
RetryConfig,
|
RetryConfig,
|
||||||
TlsConfig,
|
|
||||||
OptimizeStats,
|
OptimizeStats,
|
||||||
CompactionStats,
|
CompactionStats,
|
||||||
RemovalStats,
|
RemovalStats,
|
||||||
|
|||||||
@@ -700,27 +700,5 @@ export interface IndexOptions {
|
|||||||
*/
|
*/
|
||||||
replace?: boolean;
|
replace?: boolean;
|
||||||
|
|
||||||
/**
|
|
||||||
* Timeout in seconds to wait for index creation to complete.
|
|
||||||
*
|
|
||||||
* If not specified, the method will return immediately after starting the index creation.
|
|
||||||
*/
|
|
||||||
waitTimeoutSeconds?: number;
|
waitTimeoutSeconds?: number;
|
||||||
|
|
||||||
/**
|
|
||||||
* Optional custom name for the index.
|
|
||||||
*
|
|
||||||
* If not provided, a default name will be generated based on the column name.
|
|
||||||
*/
|
|
||||||
name?: string;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Whether to train the index with existing data.
|
|
||||||
*
|
|
||||||
* If true (default), the index will be trained with existing data in the table.
|
|
||||||
* If false, the index will be created empty and populated as new data is added.
|
|
||||||
*
|
|
||||||
* Note: This option is only supported for scalar indices. Vector indices always train.
|
|
||||||
*/
|
|
||||||
train?: boolean;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -662,8 +662,6 @@ export class LocalTable extends Table {
|
|||||||
column,
|
column,
|
||||||
options?.replace,
|
options?.replace,
|
||||||
options?.waitTimeoutSeconds,
|
options?.waitTimeoutSeconds,
|
||||||
options?.name,
|
|
||||||
options?.train,
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.22.0",
|
"version": "0.21.2",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.22.0",
|
"version": "0.21.2",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.22.1-beta.0",
|
"version": "0.21.3",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -100,12 +100,10 @@ impl Connection {
|
|||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn table_names(
|
pub async fn table_names(
|
||||||
&self,
|
&self,
|
||||||
namespace: Vec<String>,
|
|
||||||
start_after: Option<String>,
|
start_after: Option<String>,
|
||||||
limit: Option<u32>,
|
limit: Option<u32>,
|
||||||
) -> napi::Result<Vec<String>> {
|
) -> napi::Result<Vec<String>> {
|
||||||
let mut op = self.get_inner()?.table_names();
|
let mut op = self.get_inner()?.table_names();
|
||||||
op = op.namespace(namespace);
|
|
||||||
if let Some(start_after) = start_after {
|
if let Some(start_after) = start_after {
|
||||||
op = op.start_after(start_after);
|
op = op.start_after(start_after);
|
||||||
}
|
}
|
||||||
@@ -127,7 +125,6 @@ impl Connection {
|
|||||||
name: String,
|
name: String,
|
||||||
buf: Buffer,
|
buf: Buffer,
|
||||||
mode: String,
|
mode: String,
|
||||||
namespace: Vec<String>,
|
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
) -> napi::Result<Table> {
|
) -> napi::Result<Table> {
|
||||||
let batches = ipc_file_to_batches(buf.to_vec())
|
let batches = ipc_file_to_batches(buf.to_vec())
|
||||||
@@ -135,8 +132,6 @@ impl Connection {
|
|||||||
let mode = Self::parse_create_mode_str(&mode)?;
|
let mode = Self::parse_create_mode_str(&mode)?;
|
||||||
let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);
|
let mut builder = self.get_inner()?.create_table(&name, batches).mode(mode);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
|
||||||
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
for (key, value) in storage_options {
|
for (key, value) in storage_options {
|
||||||
builder = builder.storage_option(key, value);
|
builder = builder.storage_option(key, value);
|
||||||
@@ -152,7 +147,6 @@ impl Connection {
|
|||||||
name: String,
|
name: String,
|
||||||
schema_buf: Buffer,
|
schema_buf: Buffer,
|
||||||
mode: String,
|
mode: String,
|
||||||
namespace: Vec<String>,
|
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
) -> napi::Result<Table> {
|
) -> napi::Result<Table> {
|
||||||
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
|
let schema = ipc_file_to_schema(schema_buf.to_vec()).map_err(|e| {
|
||||||
@@ -163,9 +157,6 @@ impl Connection {
|
|||||||
.get_inner()?
|
.get_inner()?
|
||||||
.create_empty_table(&name, schema)
|
.create_empty_table(&name, schema)
|
||||||
.mode(mode);
|
.mode(mode);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
|
||||||
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
for (key, value) in storage_options {
|
for (key, value) in storage_options {
|
||||||
builder = builder.storage_option(key, value);
|
builder = builder.storage_option(key, value);
|
||||||
@@ -179,14 +170,10 @@ impl Connection {
|
|||||||
pub async fn open_table(
|
pub async fn open_table(
|
||||||
&self,
|
&self,
|
||||||
name: String,
|
name: String,
|
||||||
namespace: Vec<String>,
|
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
index_cache_size: Option<u32>,
|
index_cache_size: Option<u32>,
|
||||||
) -> napi::Result<Table> {
|
) -> napi::Result<Table> {
|
||||||
let mut builder = self.get_inner()?.open_table(&name);
|
let mut builder = self.get_inner()?.open_table(&name);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
|
||||||
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
for (key, value) in storage_options {
|
for (key, value) in storage_options {
|
||||||
builder = builder.storage_option(key, value);
|
builder = builder.storage_option(key, value);
|
||||||
@@ -201,18 +188,12 @@ impl Connection {
|
|||||||
|
|
||||||
/// Drop table with the name. Or raise an error if the table does not exist.
|
/// Drop table with the name. Or raise an error if the table does not exist.
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn drop_table(&self, name: String, namespace: Vec<String>) -> napi::Result<()> {
|
pub async fn drop_table(&self, name: String) -> napi::Result<()> {
|
||||||
self.get_inner()?
|
self.get_inner()?.drop_table(&name).await.default_error()
|
||||||
.drop_table(&name, &namespace)
|
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[napi(catch_unwind)]
|
#[napi(catch_unwind)]
|
||||||
pub async fn drop_all_tables(&self, namespace: Vec<String>) -> napi::Result<()> {
|
pub async fn drop_all_tables(&self) -> napi::Result<()> {
|
||||||
self.get_inner()?
|
self.get_inner()?.drop_all_tables().await.default_error()
|
||||||
.drop_all_tables(&namespace)
|
|
||||||
.await
|
|
||||||
.default_error()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -480,7 +480,6 @@ impl JsFullTextQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[napi(factory)]
|
#[napi(factory)]
|
||||||
#[allow(clippy::use_self)] // NAPI doesn't allow Self here but clippy reports it
|
|
||||||
pub fn boolean_query(queries: Vec<(String, &JsFullTextQuery)>) -> napi::Result<Self> {
|
pub fn boolean_query(queries: Vec<(String, &JsFullTextQuery)>) -> napi::Result<Self> {
|
||||||
let mut sub_queries = Vec::with_capacity(queries.len());
|
let mut sub_queries = Vec::with_capacity(queries.len());
|
||||||
for (occur, q) in queries {
|
for (occur, q) in queries {
|
||||||
|
|||||||
@@ -69,20 +69,6 @@ pub struct RetryConfig {
|
|||||||
pub statuses: Option<Vec<u16>>,
|
pub statuses: Option<Vec<u16>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TLS/mTLS configuration for the remote HTTP client.
|
|
||||||
#[napi(object)]
|
|
||||||
#[derive(Debug, Default)]
|
|
||||||
pub struct TlsConfig {
|
|
||||||
/// Path to the client certificate file (PEM format) for mTLS authentication.
|
|
||||||
pub cert_file: Option<String>,
|
|
||||||
/// Path to the client private key file (PEM format) for mTLS authentication.
|
|
||||||
pub key_file: Option<String>,
|
|
||||||
/// Path to the CA certificate file (PEM format) for server verification.
|
|
||||||
pub ssl_ca_cert: Option<String>,
|
|
||||||
/// Whether to verify the hostname in the server's certificate.
|
|
||||||
pub assert_hostname: Option<bool>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[napi(object)]
|
#[napi(object)]
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct ClientConfig {
|
pub struct ClientConfig {
|
||||||
@@ -90,8 +76,6 @@ pub struct ClientConfig {
|
|||||||
pub retry_config: Option<RetryConfig>,
|
pub retry_config: Option<RetryConfig>,
|
||||||
pub timeout_config: Option<TimeoutConfig>,
|
pub timeout_config: Option<TimeoutConfig>,
|
||||||
pub extra_headers: Option<HashMap<String, String>>,
|
pub extra_headers: Option<HashMap<String, String>>,
|
||||||
pub id_delimiter: Option<String>,
|
|
||||||
pub tls_config: Option<TlsConfig>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
|
impl From<TimeoutConfig> for lancedb::remote::TimeoutConfig {
|
||||||
@@ -122,17 +106,6 @@ impl From<RetryConfig> for lancedb::remote::RetryConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<TlsConfig> for lancedb::remote::TlsConfig {
|
|
||||||
fn from(config: TlsConfig) -> Self {
|
|
||||||
Self {
|
|
||||||
cert_file: config.cert_file,
|
|
||||||
key_file: config.key_file,
|
|
||||||
ssl_ca_cert: config.ssl_ca_cert,
|
|
||||||
assert_hostname: config.assert_hostname.unwrap_or(true),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<ClientConfig> for lancedb::remote::ClientConfig {
|
impl From<ClientConfig> for lancedb::remote::ClientConfig {
|
||||||
fn from(config: ClientConfig) -> Self {
|
fn from(config: ClientConfig) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@@ -142,8 +115,6 @@ impl From<ClientConfig> for lancedb::remote::ClientConfig {
|
|||||||
retry_config: config.retry_config.map(Into::into).unwrap_or_default(),
|
retry_config: config.retry_config.map(Into::into).unwrap_or_default(),
|
||||||
timeout_config: config.timeout_config.map(Into::into).unwrap_or_default(),
|
timeout_config: config.timeout_config.map(Into::into).unwrap_or_default(),
|
||||||
extra_headers: config.extra_headers.unwrap_or_default(),
|
extra_headers: config.extra_headers.unwrap_or_default(),
|
||||||
id_delimiter: config.id_delimiter,
|
|
||||||
tls_config: config.tls_config.map(Into::into),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session {
|
|||||||
env: napi::sys::napi_env,
|
env: napi::sys::napi_env,
|
||||||
napi_val: napi::sys::napi_value,
|
napi_val: napi::sys::napi_value,
|
||||||
) -> napi::Result<Self> {
|
) -> napi::Result<Self> {
|
||||||
let object: napi::bindgen_prelude::ClassInstance<Self> =
|
let object: napi::bindgen_prelude::ClassInstance<Session> =
|
||||||
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
|
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
|
||||||
let copy = object.clone();
|
let copy = object.clone();
|
||||||
Ok(copy)
|
Ok(copy)
|
||||||
|
|||||||
@@ -114,8 +114,6 @@ impl Table {
|
|||||||
column: String,
|
column: String,
|
||||||
replace: Option<bool>,
|
replace: Option<bool>,
|
||||||
wait_timeout_s: Option<i64>,
|
wait_timeout_s: Option<i64>,
|
||||||
name: Option<String>,
|
|
||||||
train: Option<bool>,
|
|
||||||
) -> napi::Result<()> {
|
) -> napi::Result<()> {
|
||||||
let lancedb_index = if let Some(index) = index {
|
let lancedb_index = if let Some(index) = index {
|
||||||
index.consume()?
|
index.consume()?
|
||||||
@@ -130,12 +128,6 @@ impl Table {
|
|||||||
builder =
|
builder =
|
||||||
builder.wait_timeout(std::time::Duration::from_secs(timeout.try_into().unwrap()));
|
builder.wait_timeout(std::time::Duration::from_secs(timeout.try_into().unwrap()));
|
||||||
}
|
}
|
||||||
if let Some(name) = name {
|
|
||||||
builder = builder.name(name);
|
|
||||||
}
|
|
||||||
if let Some(train) = train {
|
|
||||||
builder = builder.train(train);
|
|
||||||
}
|
|
||||||
builder.execute().await.default_error()
|
builder.execute().await.default_error()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.25.1-beta.0"
|
current_version = "0.24.3"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.25.1-beta.0"
|
version = "0.24.3"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -33,6 +33,6 @@ pyo3-build-config = { version = "0.24", features = [
|
|||||||
] }
|
] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["remote", "lancedb/default"]
|
default = ["remote"]
|
||||||
fp16kernels = ["lancedb/fp16kernels"]
|
fp16kernels = ["lancedb/fp16kernels"]
|
||||||
remote = ["lancedb/remote"]
|
remote = ["lancedb/remote"]
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ dependencies = [
|
|||||||
"pyarrow>=16",
|
"pyarrow>=16",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
"lance-namespace==0.0.6"
|
|
||||||
]
|
]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ from .remote.db import RemoteDBConnection
|
|||||||
from .schema import vector
|
from .schema import vector
|
||||||
from .table import AsyncTable
|
from .table import AsyncTable
|
||||||
from ._lancedb import Session
|
from ._lancedb import Session
|
||||||
from .namespace import connect_namespace, LanceNamespaceDBConnection
|
|
||||||
|
|
||||||
|
|
||||||
def connect(
|
def connect(
|
||||||
@@ -222,7 +221,6 @@ async def connect_async(
|
|||||||
__all__ = [
|
__all__ = [
|
||||||
"connect",
|
"connect",
|
||||||
"connect_async",
|
"connect_async",
|
||||||
"connect_namespace",
|
|
||||||
"AsyncConnection",
|
"AsyncConnection",
|
||||||
"AsyncTable",
|
"AsyncTable",
|
||||||
"URI",
|
"URI",
|
||||||
@@ -230,7 +228,6 @@ __all__ = [
|
|||||||
"vector",
|
"vector",
|
||||||
"DBConnection",
|
"DBConnection",
|
||||||
"LanceDBConnection",
|
"LanceDBConnection",
|
||||||
"LanceNamespaceDBConnection",
|
|
||||||
"RemoteDBConnection",
|
"RemoteDBConnection",
|
||||||
"Session",
|
"Session",
|
||||||
"__version__",
|
"__version__",
|
||||||
|
|||||||
@@ -21,28 +21,14 @@ class Session:
|
|||||||
|
|
||||||
class Connection(object):
|
class Connection(object):
|
||||||
uri: str
|
uri: str
|
||||||
async def is_open(self): ...
|
|
||||||
async def close(self): ...
|
|
||||||
async def list_namespaces(
|
|
||||||
self,
|
|
||||||
namespace: List[str],
|
|
||||||
page_token: Optional[str],
|
|
||||||
limit: Optional[int],
|
|
||||||
) -> List[str]: ...
|
|
||||||
async def create_namespace(self, namespace: List[str]) -> None: ...
|
|
||||||
async def drop_namespace(self, namespace: List[str]) -> None: ...
|
|
||||||
async def table_names(
|
async def table_names(
|
||||||
self,
|
self, start_after: Optional[str], limit: Optional[int]
|
||||||
namespace: List[str],
|
|
||||||
start_after: Optional[str],
|
|
||||||
limit: Optional[int],
|
|
||||||
) -> list[str]: ...
|
) -> list[str]: ...
|
||||||
async def create_table(
|
async def create_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
mode: str,
|
mode: str,
|
||||||
data: pa.RecordBatchReader,
|
data: pa.RecordBatchReader,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
) -> Table: ...
|
) -> Table: ...
|
||||||
async def create_empty_table(
|
async def create_empty_table(
|
||||||
@@ -50,25 +36,10 @@ class Connection(object):
|
|||||||
name: str,
|
name: str,
|
||||||
mode: str,
|
mode: str,
|
||||||
schema: pa.Schema,
|
schema: pa.Schema,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
) -> Table: ...
|
) -> Table: ...
|
||||||
async def open_table(
|
async def rename_table(self, old_name: str, new_name: str) -> None: ...
|
||||||
self,
|
async def drop_table(self, name: str) -> None: ...
|
||||||
name: str,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
|
||||||
) -> Table: ...
|
|
||||||
async def rename_table(
|
|
||||||
self,
|
|
||||||
cur_name: str,
|
|
||||||
new_name: str,
|
|
||||||
cur_namespace: List[str] = [],
|
|
||||||
new_namespace: List[str] = [],
|
|
||||||
) -> None: ...
|
|
||||||
async def drop_table(self, name: str, namespace: List[str] = []) -> None: ...
|
|
||||||
async def drop_all_tables(self, namespace: List[str] = []) -> None: ...
|
|
||||||
|
|
||||||
class Table:
|
class Table:
|
||||||
def name(self) -> str: ...
|
def name(self) -> str: ...
|
||||||
@@ -88,10 +59,6 @@ class Table:
|
|||||||
column: str,
|
column: str,
|
||||||
index: Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS],
|
index: Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS],
|
||||||
replace: Optional[bool],
|
replace: Optional[bool],
|
||||||
wait_timeout: Optional[object],
|
|
||||||
*,
|
|
||||||
name: Optional[str],
|
|
||||||
train: Optional[bool],
|
|
||||||
): ...
|
): ...
|
||||||
async def list_versions(self) -> List[Dict[str, Any]]: ...
|
async def list_versions(self) -> List[Dict[str, Any]]: ...
|
||||||
async def version(self) -> int: ...
|
async def version(self) -> int: ...
|
||||||
|
|||||||
@@ -43,70 +43,14 @@ if TYPE_CHECKING:
|
|||||||
class DBConnection(EnforceOverrides):
|
class DBConnection(EnforceOverrides):
|
||||||
"""An active LanceDB connection interface."""
|
"""An active LanceDB connection interface."""
|
||||||
|
|
||||||
def list_namespaces(
|
|
||||||
self,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
) -> Iterable[str]:
|
|
||||||
"""List immediate child namespace names in the given namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str], default []
|
|
||||||
The parent namespace to list namespaces in.
|
|
||||||
Empty list represents root namespace.
|
|
||||||
page_token: str, optional
|
|
||||||
The token to use for pagination. If not present, start from the beginning.
|
|
||||||
limit: int, default 10
|
|
||||||
The size of the page to return.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Iterable of str
|
|
||||||
List of immediate child namespace names
|
|
||||||
"""
|
|
||||||
return []
|
|
||||||
|
|
||||||
def create_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""Create a new namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str]
|
|
||||||
The namespace identifier to create.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"Namespace operations are not supported for this connection type"
|
|
||||||
)
|
|
||||||
|
|
||||||
def drop_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""Drop a namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str]
|
|
||||||
The namespace identifier to drop.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError(
|
|
||||||
"Namespace operations are not supported for this connection type"
|
|
||||||
)
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def table_names(
|
def table_names(
|
||||||
self,
|
self, page_token: Optional[str] = None, limit: int = 10
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
"""List all tables in this database, in sorted order
|
"""List all tables in this database, in sorted order
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], default []
|
|
||||||
The namespace to list tables in.
|
|
||||||
Empty list represents root namespace.
|
|
||||||
page_token: str, optional
|
page_token: str, optional
|
||||||
The token to use for pagination. If not present, start from the beginning.
|
The token to use for pagination. If not present, start from the beginning.
|
||||||
Typically, this token is last table name from the previous page.
|
Typically, this token is last table name from the previous page.
|
||||||
@@ -133,7 +77,6 @@ class DBConnection(EnforceOverrides):
|
|||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
*,
|
*,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
data_storage_version: Optional[str] = None,
|
data_storage_version: Optional[str] = None,
|
||||||
enable_v2_manifest_paths: Optional[bool] = None,
|
enable_v2_manifest_paths: Optional[bool] = None,
|
||||||
@@ -144,9 +87,6 @@ class DBConnection(EnforceOverrides):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], default []
|
|
||||||
The namespace to create the table in.
|
|
||||||
Empty list represents root namespace.
|
|
||||||
data: The data to initialize the table, *optional*
|
data: The data to initialize the table, *optional*
|
||||||
User must provide at least one of `data` or `schema`.
|
User must provide at least one of `data` or `schema`.
|
||||||
Acceptable types are:
|
Acceptable types are:
|
||||||
@@ -298,7 +238,6 @@ class DBConnection(EnforceOverrides):
|
|||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
) -> Table:
|
) -> Table:
|
||||||
@@ -308,9 +247,6 @@ class DBConnection(EnforceOverrides):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to open the table from.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
index_cache_size: int, default 256
|
index_cache_size: int, default 256
|
||||||
**Deprecated**: Use session-level cache configuration instead.
|
**Deprecated**: Use session-level cache configuration instead.
|
||||||
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
Create a Session with custom cache sizes and pass it to lancedb.connect().
|
||||||
@@ -336,26 +272,17 @@ class DBConnection(EnforceOverrides):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def drop_table(self, name: str, namespace: List[str] = []):
|
def drop_table(self, name: str):
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], default []
|
|
||||||
The namespace to drop the table from.
|
|
||||||
Empty list represents root namespace.
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def rename_table(
|
def rename_table(self, cur_name: str, new_name: str):
|
||||||
self,
|
|
||||||
cur_name: str,
|
|
||||||
new_name: str,
|
|
||||||
cur_namespace: List[str] = [],
|
|
||||||
new_namespace: List[str] = [],
|
|
||||||
):
|
|
||||||
"""Rename a table in the database.
|
"""Rename a table in the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -364,12 +291,6 @@ class DBConnection(EnforceOverrides):
|
|||||||
The current name of the table.
|
The current name of the table.
|
||||||
new_name: str
|
new_name: str
|
||||||
The new name of the table.
|
The new name of the table.
|
||||||
cur_namespace: List[str], optional
|
|
||||||
The namespace of the current table.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
new_namespace: List[str], optional
|
|
||||||
The namespace to move the table to.
|
|
||||||
If not specified, defaults to the same as cur_namespace.
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -380,15 +301,9 @@ class DBConnection(EnforceOverrides):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def drop_all_tables(self, namespace: List[str] = []):
|
def drop_all_tables(self):
|
||||||
"""
|
"""
|
||||||
Drop all tables from the database
|
Drop all tables from the database
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to drop all tables from.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -489,87 +404,18 @@ class LanceDBConnection(DBConnection):
|
|||||||
conn = AsyncConnection(await lancedb_connect(self.uri))
|
conn = AsyncConnection(await lancedb_connect(self.uri))
|
||||||
return await conn.table_names(start_after=start_after, limit=limit)
|
return await conn.table_names(start_after=start_after, limit=limit)
|
||||||
|
|
||||||
@override
|
|
||||||
def list_namespaces(
|
|
||||||
self,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
) -> Iterable[str]:
|
|
||||||
"""List immediate child namespace names in the given namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str], optional
|
|
||||||
The parent namespace to list namespaces in.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
page_token: str, optional
|
|
||||||
The token to use for pagination. If not present, start from the beginning.
|
|
||||||
limit: int, default 10
|
|
||||||
The size of the page to return.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Iterable of str
|
|
||||||
List of immediate child namespace names
|
|
||||||
"""
|
|
||||||
return LOOP.run(
|
|
||||||
self._conn.list_namespaces(
|
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def create_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""Create a new namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str]
|
|
||||||
The namespace identifier to create.
|
|
||||||
"""
|
|
||||||
LOOP.run(self._conn.create_namespace(namespace=namespace))
|
|
||||||
|
|
||||||
@override
|
|
||||||
def drop_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""Drop a namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str]
|
|
||||||
The namespace identifier to drop.
|
|
||||||
"""
|
|
||||||
return LOOP.run(self._conn.drop_namespace(namespace=namespace))
|
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def table_names(
|
def table_names(
|
||||||
self,
|
self, page_token: Optional[str] = None, limit: int = 10
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
"""Get the names of all tables in the database. The names are sorted.
|
"""Get the names of all tables in the database. The names are sorted.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to list tables in.
|
|
||||||
page_token: str, optional
|
|
||||||
The token to use for pagination.
|
|
||||||
limit: int, default 10
|
|
||||||
The maximum number of tables to return.
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
Iterator of str.
|
Iterator of str.
|
||||||
A list of table names.
|
A list of table names.
|
||||||
"""
|
"""
|
||||||
return LOOP.run(
|
return LOOP.run(self._conn.table_names(start_after=page_token, limit=limit))
|
||||||
self._conn.table_names(
|
|
||||||
namespace=namespace, start_after=page_token, limit=limit
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return len(self.table_names())
|
return len(self.table_names())
|
||||||
@@ -589,18 +435,12 @@ class LanceDBConnection(DBConnection):
|
|||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
*,
|
*,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
data_storage_version: Optional[str] = None,
|
data_storage_version: Optional[str] = None,
|
||||||
enable_v2_manifest_paths: Optional[bool] = None,
|
enable_v2_manifest_paths: Optional[bool] = None,
|
||||||
) -> LanceTable:
|
) -> LanceTable:
|
||||||
"""Create a table in the database.
|
"""Create a table in the database.
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to create the table in.
|
|
||||||
|
|
||||||
See
|
See
|
||||||
---
|
---
|
||||||
DBConnection.create_table
|
DBConnection.create_table
|
||||||
@@ -619,7 +459,6 @@ class LanceDBConnection(DBConnection):
|
|||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
fill_value=fill_value,
|
fill_value=fill_value,
|
||||||
embedding_functions=embedding_functions,
|
embedding_functions=embedding_functions,
|
||||||
namespace=namespace,
|
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
)
|
)
|
||||||
return tbl
|
return tbl
|
||||||
@@ -629,7 +468,6 @@ class LanceDBConnection(DBConnection):
|
|||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
) -> LanceTable:
|
) -> LanceTable:
|
||||||
@@ -639,8 +477,6 @@ class LanceDBConnection(DBConnection):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to open the table from.
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -660,68 +496,26 @@ class LanceDBConnection(DBConnection):
|
|||||||
return LanceTable.open(
|
return LanceTable.open(
|
||||||
self,
|
self,
|
||||||
name,
|
name,
|
||||||
namespace=namespace,
|
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_table(
|
def drop_table(self, name: str, ignore_missing: bool = False):
|
||||||
self,
|
|
||||||
name: str,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
ignore_missing: bool = False,
|
|
||||||
):
|
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to drop the table from.
|
|
||||||
ignore_missing: bool, default False
|
ignore_missing: bool, default False
|
||||||
If True, ignore if the table does not exist.
|
If True, ignore if the table does not exist.
|
||||||
"""
|
"""
|
||||||
LOOP.run(
|
LOOP.run(self._conn.drop_table(name, ignore_missing=ignore_missing))
|
||||||
self._conn.drop_table(
|
|
||||||
name, namespace=namespace, ignore_missing=ignore_missing
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_all_tables(self, namespace: List[str] = []):
|
def drop_all_tables(self):
|
||||||
LOOP.run(self._conn.drop_all_tables(namespace=namespace))
|
LOOP.run(self._conn.drop_all_tables())
|
||||||
|
|
||||||
@override
|
|
||||||
def rename_table(
|
|
||||||
self,
|
|
||||||
cur_name: str,
|
|
||||||
new_name: str,
|
|
||||||
cur_namespace: List[str] = [],
|
|
||||||
new_namespace: List[str] = [],
|
|
||||||
):
|
|
||||||
"""Rename a table in the database.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
cur_name: str
|
|
||||||
The current name of the table.
|
|
||||||
new_name: str
|
|
||||||
The new name of the table.
|
|
||||||
cur_namespace: List[str], optional
|
|
||||||
The namespace of the current table.
|
|
||||||
new_namespace: List[str], optional
|
|
||||||
The namespace to move the table to.
|
|
||||||
"""
|
|
||||||
LOOP.run(
|
|
||||||
self._conn.rename_table(
|
|
||||||
cur_name,
|
|
||||||
new_name,
|
|
||||||
cur_namespace=cur_namespace,
|
|
||||||
new_namespace=new_namespace,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
@deprecation.deprecated(
|
@deprecation.deprecated(
|
||||||
deprecated_in="0.15.1",
|
deprecated_in="0.15.1",
|
||||||
@@ -794,67 +588,13 @@ class AsyncConnection(object):
|
|||||||
def uri(self) -> str:
|
def uri(self) -> str:
|
||||||
return self._inner.uri
|
return self._inner.uri
|
||||||
|
|
||||||
async def list_namespaces(
|
|
||||||
self,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
) -> Iterable[str]:
|
|
||||||
"""List immediate child namespace names in the given namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str], optional
|
|
||||||
The parent namespace to list namespaces in.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
page_token: str, optional
|
|
||||||
The token to use for pagination. If not present, start from the beginning.
|
|
||||||
limit: int, default 10
|
|
||||||
The size of the page to return.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Iterable of str
|
|
||||||
List of immediate child namespace names (not full paths)
|
|
||||||
"""
|
|
||||||
return await self._inner.list_namespaces(
|
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
|
||||||
)
|
|
||||||
|
|
||||||
async def create_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""Create a new namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str]
|
|
||||||
The namespace identifier to create.
|
|
||||||
"""
|
|
||||||
await self._inner.create_namespace(namespace)
|
|
||||||
|
|
||||||
async def drop_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""Drop a namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str]
|
|
||||||
The namespace identifier to drop.
|
|
||||||
"""
|
|
||||||
await self._inner.drop_namespace(namespace)
|
|
||||||
|
|
||||||
async def table_names(
|
async def table_names(
|
||||||
self,
|
self, *, start_after: Optional[str] = None, limit: Optional[int] = None
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
start_after: Optional[str] = None,
|
|
||||||
limit: Optional[int] = None,
|
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
"""List all tables in this database, in sorted order
|
"""List all tables in this database, in sorted order
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to list tables in.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
start_after: str, optional
|
start_after: str, optional
|
||||||
If present, only return names that come lexicographically after the supplied
|
If present, only return names that come lexicographically after the supplied
|
||||||
value.
|
value.
|
||||||
@@ -868,9 +608,7 @@ class AsyncConnection(object):
|
|||||||
-------
|
-------
|
||||||
Iterable of str
|
Iterable of str
|
||||||
"""
|
"""
|
||||||
return await self._inner.table_names(
|
return await self._inner.table_names(start_after=start_after, limit=limit)
|
||||||
namespace=namespace, start_after=start_after, limit=limit
|
|
||||||
)
|
|
||||||
|
|
||||||
async def create_table(
|
async def create_table(
|
||||||
self,
|
self,
|
||||||
@@ -883,7 +621,6 @@ class AsyncConnection(object):
|
|||||||
fill_value: Optional[float] = None,
|
fill_value: Optional[float] = None,
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
*,
|
*,
|
||||||
namespace: List[str] = [],
|
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
) -> AsyncTable:
|
) -> AsyncTable:
|
||||||
"""Create an [AsyncTable][lancedb.table.AsyncTable] in the database.
|
"""Create an [AsyncTable][lancedb.table.AsyncTable] in the database.
|
||||||
@@ -892,9 +629,6 @@ class AsyncConnection(object):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], default []
|
|
||||||
The namespace to create the table in.
|
|
||||||
Empty list represents root namespace.
|
|
||||||
data: The data to initialize the table, *optional*
|
data: The data to initialize the table, *optional*
|
||||||
User must provide at least one of `data` or `schema`.
|
User must provide at least one of `data` or `schema`.
|
||||||
Acceptable types are:
|
Acceptable types are:
|
||||||
@@ -1073,7 +807,6 @@ class AsyncConnection(object):
|
|||||||
name,
|
name,
|
||||||
mode,
|
mode,
|
||||||
schema,
|
schema,
|
||||||
namespace=namespace,
|
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -1082,7 +815,6 @@ class AsyncConnection(object):
|
|||||||
name,
|
name,
|
||||||
mode,
|
mode,
|
||||||
data,
|
data,
|
||||||
namespace=namespace,
|
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1091,8 +823,6 @@ class AsyncConnection(object):
|
|||||||
async def open_table(
|
async def open_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
) -> AsyncTable:
|
) -> AsyncTable:
|
||||||
@@ -1102,9 +832,6 @@ class AsyncConnection(object):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to open the table from.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
storage_options: dict, optional
|
storage_options: dict, optional
|
||||||
Additional options for the storage backend. Options already set on the
|
Additional options for the storage backend. Options already set on the
|
||||||
connection will be inherited by the table, but can be overridden here.
|
connection will be inherited by the table, but can be overridden here.
|
||||||
@@ -1128,77 +855,42 @@ class AsyncConnection(object):
|
|||||||
-------
|
-------
|
||||||
A LanceTable object representing the table.
|
A LanceTable object representing the table.
|
||||||
"""
|
"""
|
||||||
table = await self._inner.open_table(
|
table = await self._inner.open_table(name, storage_options, index_cache_size)
|
||||||
name,
|
|
||||||
namespace=namespace,
|
|
||||||
storage_options=storage_options,
|
|
||||||
index_cache_size=index_cache_size,
|
|
||||||
)
|
|
||||||
return AsyncTable(table)
|
return AsyncTable(table)
|
||||||
|
|
||||||
async def rename_table(
|
async def rename_table(self, old_name: str, new_name: str):
|
||||||
self,
|
|
||||||
cur_name: str,
|
|
||||||
new_name: str,
|
|
||||||
cur_namespace: List[str] = [],
|
|
||||||
new_namespace: List[str] = [],
|
|
||||||
):
|
|
||||||
"""Rename a table in the database.
|
"""Rename a table in the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
cur_name: str
|
old_name: str
|
||||||
The current name of the table.
|
The current name of the table.
|
||||||
new_name: str
|
new_name: str
|
||||||
The new name of the table.
|
The new name of the table.
|
||||||
cur_namespace: List[str], optional
|
|
||||||
The namespace of the current table.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
new_namespace: List[str], optional
|
|
||||||
The namespace to move the table to.
|
|
||||||
If not specified, defaults to the same as cur_namespace.
|
|
||||||
"""
|
"""
|
||||||
await self._inner.rename_table(
|
await self._inner.rename_table(old_name, new_name)
|
||||||
cur_name, new_name, cur_namespace=cur_namespace, new_namespace=new_namespace
|
|
||||||
)
|
|
||||||
|
|
||||||
async def drop_table(
|
async def drop_table(self, name: str, *, ignore_missing: bool = False):
|
||||||
self,
|
|
||||||
name: str,
|
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
ignore_missing: bool = False,
|
|
||||||
):
|
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], default []
|
|
||||||
The namespace to drop the table from.
|
|
||||||
Empty list represents root namespace.
|
|
||||||
ignore_missing: bool, default False
|
ignore_missing: bool, default False
|
||||||
If True, ignore if the table does not exist.
|
If True, ignore if the table does not exist.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
await self._inner.drop_table(name, namespace=namespace)
|
await self._inner.drop_table(name)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
if not ignore_missing:
|
if not ignore_missing:
|
||||||
raise e
|
raise e
|
||||||
if f"Table '{name}' was not found" not in str(e):
|
if f"Table '{name}' was not found" not in str(e):
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def drop_all_tables(self, namespace: List[str] = []):
|
async def drop_all_tables(self):
|
||||||
"""Drop all tables from the database.
|
"""Drop all tables from the database."""
|
||||||
|
await self._inner.drop_all_tables()
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to drop all tables from.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
"""
|
|
||||||
await self._inner.drop_all_tables(namespace=namespace)
|
|
||||||
|
|
||||||
@deprecation.deprecated(
|
@deprecation.deprecated(
|
||||||
deprecated_in="0.15.1",
|
deprecated_in="0.15.1",
|
||||||
|
|||||||
@@ -1,401 +0,0 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
"""
|
|
||||||
LanceDB Namespace integration module.
|
|
||||||
|
|
||||||
This module provides integration with lance_namespace for managing tables
|
|
||||||
through a namespace abstraction.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from typing import Dict, Iterable, List, Optional, Union
|
|
||||||
import os
|
|
||||||
|
|
||||||
from lancedb.db import DBConnection
|
|
||||||
from lancedb.table import LanceTable, Table
|
|
||||||
from lancedb.util import validate_table_name
|
|
||||||
from lancedb.common import validate_schema
|
|
||||||
from lancedb.table import sanitize_create_table
|
|
||||||
from overrides import override
|
|
||||||
|
|
||||||
from lance_namespace import LanceNamespace, connect as namespace_connect
|
|
||||||
from lance_namespace_urllib3_client.models import (
|
|
||||||
ListTablesRequest,
|
|
||||||
DescribeTableRequest,
|
|
||||||
CreateTableRequest,
|
|
||||||
DropTableRequest,
|
|
||||||
ListNamespacesRequest,
|
|
||||||
CreateNamespaceRequest,
|
|
||||||
DropNamespaceRequest,
|
|
||||||
JsonArrowSchema,
|
|
||||||
JsonArrowField,
|
|
||||||
JsonArrowDataType,
|
|
||||||
)
|
|
||||||
|
|
||||||
import pyarrow as pa
|
|
||||||
from datetime import timedelta
|
|
||||||
from lancedb.pydantic import LanceModel
|
|
||||||
from lancedb.common import DATA
|
|
||||||
from lancedb.embeddings import EmbeddingFunctionConfig
|
|
||||||
from ._lancedb import Session
|
|
||||||
|
|
||||||
|
|
||||||
def _convert_pyarrow_type_to_json(arrow_type: pa.DataType) -> JsonArrowDataType:
|
|
||||||
"""Convert PyArrow DataType to JsonArrowDataType."""
|
|
||||||
if pa.types.is_null(arrow_type):
|
|
||||||
type_name = "null"
|
|
||||||
elif pa.types.is_boolean(arrow_type):
|
|
||||||
type_name = "bool"
|
|
||||||
elif pa.types.is_int8(arrow_type):
|
|
||||||
type_name = "int8"
|
|
||||||
elif pa.types.is_uint8(arrow_type):
|
|
||||||
type_name = "uint8"
|
|
||||||
elif pa.types.is_int16(arrow_type):
|
|
||||||
type_name = "int16"
|
|
||||||
elif pa.types.is_uint16(arrow_type):
|
|
||||||
type_name = "uint16"
|
|
||||||
elif pa.types.is_int32(arrow_type):
|
|
||||||
type_name = "int32"
|
|
||||||
elif pa.types.is_uint32(arrow_type):
|
|
||||||
type_name = "uint32"
|
|
||||||
elif pa.types.is_int64(arrow_type):
|
|
||||||
type_name = "int64"
|
|
||||||
elif pa.types.is_uint64(arrow_type):
|
|
||||||
type_name = "uint64"
|
|
||||||
elif pa.types.is_float32(arrow_type):
|
|
||||||
type_name = "float32"
|
|
||||||
elif pa.types.is_float64(arrow_type):
|
|
||||||
type_name = "float64"
|
|
||||||
elif pa.types.is_string(arrow_type):
|
|
||||||
type_name = "utf8"
|
|
||||||
elif pa.types.is_binary(arrow_type):
|
|
||||||
type_name = "binary"
|
|
||||||
elif pa.types.is_list(arrow_type):
|
|
||||||
# For list types, we need more complex handling
|
|
||||||
type_name = "list"
|
|
||||||
elif pa.types.is_fixed_size_list(arrow_type):
|
|
||||||
type_name = "fixed_size_list"
|
|
||||||
else:
|
|
||||||
# Default to string representation for unsupported types
|
|
||||||
type_name = str(arrow_type)
|
|
||||||
|
|
||||||
return JsonArrowDataType(type=type_name)
|
|
||||||
|
|
||||||
|
|
||||||
def _convert_pyarrow_schema_to_json(schema: pa.Schema) -> JsonArrowSchema:
|
|
||||||
"""Convert PyArrow Schema to JsonArrowSchema."""
|
|
||||||
fields = []
|
|
||||||
for field in schema:
|
|
||||||
json_field = JsonArrowField(
|
|
||||||
name=field.name,
|
|
||||||
type=_convert_pyarrow_type_to_json(field.type),
|
|
||||||
nullable=field.nullable,
|
|
||||||
metadata=field.metadata,
|
|
||||||
)
|
|
||||||
fields.append(json_field)
|
|
||||||
|
|
||||||
return JsonArrowSchema(fields=fields, metadata=schema.metadata)
|
|
||||||
|
|
||||||
|
|
||||||
class LanceNamespaceDBConnection(DBConnection):
|
|
||||||
"""
|
|
||||||
A LanceDB connection that uses a namespace for table management.
|
|
||||||
|
|
||||||
This connection delegates table URI resolution to a lance_namespace instance,
|
|
||||||
while using the standard LanceTable for actual table operations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
namespace: LanceNamespace,
|
|
||||||
*,
|
|
||||||
read_consistency_interval: Optional[timedelta] = None,
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
|
||||||
session: Optional[Session] = None,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
Initialize a namespace-based LanceDB connection.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace : LanceNamespace
|
|
||||||
The namespace instance to use for table management
|
|
||||||
read_consistency_interval : Optional[timedelta]
|
|
||||||
The interval at which to check for updates to the table from other
|
|
||||||
processes. If None, then consistency is not checked.
|
|
||||||
storage_options : Optional[Dict[str, str]]
|
|
||||||
Additional options for the storage backend
|
|
||||||
session : Optional[Session]
|
|
||||||
A session to use for this connection
|
|
||||||
"""
|
|
||||||
self._ns = namespace
|
|
||||||
self.read_consistency_interval = read_consistency_interval
|
|
||||||
self.storage_options = storage_options or {}
|
|
||||||
self.session = session
|
|
||||||
|
|
||||||
@override
|
|
||||||
def table_names(
|
|
||||||
self,
|
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
) -> Iterable[str]:
|
|
||||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
|
||||||
response = self._ns.list_tables(request)
|
|
||||||
return response.tables if response.tables else []
|
|
||||||
|
|
||||||
@override
|
|
||||||
def create_table(
|
|
||||||
self,
|
|
||||||
name: str,
|
|
||||||
data: Optional[DATA] = None,
|
|
||||||
schema: Optional[Union[pa.Schema, LanceModel]] = None,
|
|
||||||
mode: str = "create",
|
|
||||||
exist_ok: bool = False,
|
|
||||||
on_bad_vectors: str = "error",
|
|
||||||
fill_value: float = 0.0,
|
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
|
||||||
data_storage_version: Optional[str] = None,
|
|
||||||
enable_v2_manifest_paths: Optional[bool] = None,
|
|
||||||
) -> Table:
|
|
||||||
if mode.lower() not in ["create", "overwrite"]:
|
|
||||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
|
||||||
validate_table_name(name)
|
|
||||||
|
|
||||||
# TODO: support passing data
|
|
||||||
if data is not None:
|
|
||||||
raise ValueError(
|
|
||||||
"create_table currently only supports creating empty tables (data=None)"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Prepare schema
|
|
||||||
metadata = None
|
|
||||||
if embedding_functions is not None:
|
|
||||||
from lancedb.embeddings.registry import EmbeddingFunctionRegistry
|
|
||||||
|
|
||||||
registry = EmbeddingFunctionRegistry.get_instance()
|
|
||||||
metadata = registry.get_table_metadata(embedding_functions)
|
|
||||||
|
|
||||||
data, schema = sanitize_create_table(
|
|
||||||
data, schema, metadata, on_bad_vectors, fill_value
|
|
||||||
)
|
|
||||||
validate_schema(schema)
|
|
||||||
|
|
||||||
# Convert PyArrow schema to JsonArrowSchema
|
|
||||||
json_schema = _convert_pyarrow_schema_to_json(schema)
|
|
||||||
|
|
||||||
# Create table request with namespace
|
|
||||||
table_id = namespace + [name]
|
|
||||||
request = CreateTableRequest(id=table_id, var_schema=json_schema)
|
|
||||||
|
|
||||||
# Create empty Arrow IPC stream bytes
|
|
||||||
import pyarrow.ipc as ipc
|
|
||||||
import io
|
|
||||||
|
|
||||||
empty_table = pa.Table.from_arrays(
|
|
||||||
[pa.array([], type=field.type) for field in schema], schema=schema
|
|
||||||
)
|
|
||||||
buffer = io.BytesIO()
|
|
||||||
with ipc.new_stream(buffer, schema) as writer:
|
|
||||||
writer.write_table(empty_table)
|
|
||||||
request_data = buffer.getvalue()
|
|
||||||
|
|
||||||
self._ns.create_table(request, request_data)
|
|
||||||
return self.open_table(
|
|
||||||
name, namespace=namespace, storage_options=storage_options
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def open_table(
|
|
||||||
self,
|
|
||||||
name: str,
|
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
|
||||||
) -> Table:
|
|
||||||
table_id = namespace + [name]
|
|
||||||
request = DescribeTableRequest(id=table_id)
|
|
||||||
response = self._ns.describe_table(request)
|
|
||||||
|
|
||||||
merged_storage_options = dict()
|
|
||||||
if storage_options:
|
|
||||||
merged_storage_options.update(storage_options)
|
|
||||||
if response.storage_options:
|
|
||||||
merged_storage_options.update(response.storage_options)
|
|
||||||
|
|
||||||
return self._lance_table_from_uri(
|
|
||||||
response.location,
|
|
||||||
storage_options=merged_storage_options,
|
|
||||||
index_cache_size=index_cache_size,
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def drop_table(self, name: str, namespace: List[str] = []):
|
|
||||||
# Use namespace drop_table directly
|
|
||||||
table_id = namespace + [name]
|
|
||||||
request = DropTableRequest(id=table_id)
|
|
||||||
self._ns.drop_table(request)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def rename_table(
|
|
||||||
self,
|
|
||||||
cur_name: str,
|
|
||||||
new_name: str,
|
|
||||||
cur_namespace: List[str] = [],
|
|
||||||
new_namespace: List[str] = [],
|
|
||||||
):
|
|
||||||
raise NotImplementedError(
|
|
||||||
"rename_table is not supported for namespace connections"
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def drop_database(self):
|
|
||||||
raise NotImplementedError(
|
|
||||||
"drop_database is deprecated, use drop_all_tables instead"
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def drop_all_tables(self, namespace: List[str] = []):
|
|
||||||
for table_name in self.table_names(namespace=namespace):
|
|
||||||
self.drop_table(table_name, namespace=namespace)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def list_namespaces(
|
|
||||||
self,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
) -> Iterable[str]:
|
|
||||||
"""
|
|
||||||
List child namespaces under the given namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace : Optional[List[str]]
|
|
||||||
The parent namespace to list children from.
|
|
||||||
If None, lists root-level namespaces.
|
|
||||||
page_token : Optional[str]
|
|
||||||
Pagination token for listing results.
|
|
||||||
limit : int
|
|
||||||
Maximum number of namespaces to return.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Iterable[str]
|
|
||||||
Names of child namespaces.
|
|
||||||
"""
|
|
||||||
request = ListNamespacesRequest(
|
|
||||||
id=namespace, page_token=page_token, limit=limit
|
|
||||||
)
|
|
||||||
response = self._ns.list_namespaces(request)
|
|
||||||
return response.namespaces if response.namespaces else []
|
|
||||||
|
|
||||||
@override
|
|
||||||
def create_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""
|
|
||||||
Create a new namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace : List[str]
|
|
||||||
The namespace path to create.
|
|
||||||
"""
|
|
||||||
request = CreateNamespaceRequest(id=namespace)
|
|
||||||
self._ns.create_namespace(request)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def drop_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""
|
|
||||||
Drop a namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace : List[str]
|
|
||||||
The namespace path to drop.
|
|
||||||
"""
|
|
||||||
request = DropNamespaceRequest(id=namespace)
|
|
||||||
self._ns.drop_namespace(request)
|
|
||||||
|
|
||||||
def _lance_table_from_uri(
|
|
||||||
self,
|
|
||||||
table_uri: str,
|
|
||||||
*,
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
|
||||||
index_cache_size: Optional[int] = None,
|
|
||||||
) -> LanceTable:
|
|
||||||
# Extract the base path and table name from the URI
|
|
||||||
if table_uri.endswith(".lance"):
|
|
||||||
base_path = os.path.dirname(table_uri)
|
|
||||||
table_name = os.path.basename(table_uri)[:-6] # Remove .lance
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Invalid table URI: {table_uri}")
|
|
||||||
|
|
||||||
from lancedb.db import LanceDBConnection
|
|
||||||
|
|
||||||
temp_conn = LanceDBConnection(
|
|
||||||
base_path,
|
|
||||||
read_consistency_interval=self.read_consistency_interval,
|
|
||||||
storage_options={**self.storage_options, **(storage_options or {})},
|
|
||||||
session=self.session,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Open the table using the temporary connection
|
|
||||||
return LanceTable.open(
|
|
||||||
temp_conn,
|
|
||||||
table_name,
|
|
||||||
storage_options=storage_options,
|
|
||||||
index_cache_size=index_cache_size,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def connect_namespace(
|
|
||||||
impl: str,
|
|
||||||
properties: Dict[str, str],
|
|
||||||
*,
|
|
||||||
read_consistency_interval: Optional[timedelta] = None,
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
|
||||||
session: Optional[Session] = None,
|
|
||||||
) -> LanceNamespaceDBConnection:
|
|
||||||
"""
|
|
||||||
Connect to a LanceDB database through a namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
impl : str
|
|
||||||
The namespace implementation to use. For examples:
|
|
||||||
- "dir" for DirectoryNamespace
|
|
||||||
- "rest" for REST-based namespace
|
|
||||||
- Full module path for custom implementations
|
|
||||||
properties : Dict[str, str]
|
|
||||||
Configuration properties for the namespace implementation.
|
|
||||||
Different namespace implemenation has different config properties.
|
|
||||||
For example, use DirectoryNamespace with {"root": "/path/to/directory"}
|
|
||||||
read_consistency_interval : Optional[timedelta]
|
|
||||||
The interval at which to check for updates to the table from other
|
|
||||||
processes. If None, then consistency is not checked.
|
|
||||||
storage_options : Optional[Dict[str, str]]
|
|
||||||
Additional options for the storage backend
|
|
||||||
session : Optional[Session]
|
|
||||||
A session to use for this connection
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
LanceNamespaceDBConnection
|
|
||||||
A namespace-based connection to LanceDB
|
|
||||||
"""
|
|
||||||
namespace = namespace_connect(impl, properties)
|
|
||||||
|
|
||||||
# Return the namespace-based connection
|
|
||||||
return LanceNamespaceDBConnection(
|
|
||||||
namespace,
|
|
||||||
read_consistency_interval=read_consistency_interval,
|
|
||||||
storage_options=storage_options,
|
|
||||||
session=session,
|
|
||||||
)
|
|
||||||
@@ -943,22 +943,20 @@ class LanceQueryBuilder(ABC):
|
|||||||
>>> query = [100, 100]
|
>>> query = [100, 100]
|
||||||
>>> plan = table.search(query).analyze_plan()
|
>>> plan = table.search(query).analyze_plan()
|
||||||
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
|
AnalyzeExec verbose=true, metrics=[]
|
||||||
TracedExec, metrics=[], cumulative_cpu=...
|
TracedExec, metrics=[]
|
||||||
ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
|
ProjectionExec: expr=[...], metrics=[...]
|
||||||
GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
|
GlobalLimitExec: skip=0, fetch=10, metrics=[...]
|
||||||
FilterExec: _distance@2 IS NOT NULL,
|
FilterExec: _distance@2 IS NOT NULL,
|
||||||
metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
|
metrics=[output_rows=..., elapsed_compute=...]
|
||||||
SortExec: TopK(fetch=10), expr=[...],
|
SortExec: TopK(fetch=10), expr=[...],
|
||||||
preserve_partitioning=[...],
|
preserve_partitioning=[...],
|
||||||
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
|
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...]
|
||||||
cumulative_cpu=...
|
|
||||||
KNNVectorDistance: metric=l2,
|
KNNVectorDistance: metric=l2,
|
||||||
metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
|
metrics=[output_rows=..., elapsed_compute=..., output_batches=...]
|
||||||
cumulative_cpu=...
|
|
||||||
LanceRead: uri=..., projection=[vector], ...
|
LanceRead: uri=..., projection=[vector], ...
|
||||||
metrics=[output_rows=..., elapsed_compute=...,
|
metrics=[output_rows=..., elapsed_compute=...,
|
||||||
bytes_read=..., iops=..., requests=...], cumulative_cpu=...
|
bytes_read=..., iops=..., requests=...]
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from typing import List, Optional
|
|||||||
|
|
||||||
from lancedb import __version__
|
from lancedb import __version__
|
||||||
|
|
||||||
__all__ = ["TimeoutConfig", "RetryConfig", "TlsConfig", "ClientConfig"]
|
__all__ = ["TimeoutConfig", "RetryConfig", "ClientConfig"]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -112,42 +112,15 @@ class RetryConfig:
|
|||||||
statuses: Optional[List[int]] = None
|
statuses: Optional[List[int]] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TlsConfig:
|
|
||||||
"""TLS/mTLS configuration for the remote HTTP client.
|
|
||||||
|
|
||||||
Attributes
|
|
||||||
----------
|
|
||||||
cert_file: Optional[str]
|
|
||||||
Path to the client certificate file (PEM format) for mTLS authentication.
|
|
||||||
key_file: Optional[str]
|
|
||||||
Path to the client private key file (PEM format) for mTLS authentication.
|
|
||||||
ssl_ca_cert: Optional[str]
|
|
||||||
Path to the CA certificate file (PEM format) for server verification.
|
|
||||||
assert_hostname: bool
|
|
||||||
Whether to verify the hostname in the server's certificate. Default is True.
|
|
||||||
Set to False to disable hostname verification (use with caution).
|
|
||||||
"""
|
|
||||||
|
|
||||||
cert_file: Optional[str] = None
|
|
||||||
key_file: Optional[str] = None
|
|
||||||
ssl_ca_cert: Optional[str] = None
|
|
||||||
assert_hostname: bool = True
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ClientConfig:
|
class ClientConfig:
|
||||||
user_agent: str = f"LanceDB-Python-Client/{__version__}"
|
user_agent: str = f"LanceDB-Python-Client/{__version__}"
|
||||||
retry_config: RetryConfig = field(default_factory=RetryConfig)
|
retry_config: RetryConfig = field(default_factory=RetryConfig)
|
||||||
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
|
timeout_config: Optional[TimeoutConfig] = field(default_factory=TimeoutConfig)
|
||||||
extra_headers: Optional[dict] = None
|
extra_headers: Optional[dict] = None
|
||||||
id_delimiter: Optional[str] = None
|
|
||||||
tls_config: Optional[TlsConfig] = None
|
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
if isinstance(self.retry_config, dict):
|
if isinstance(self.retry_config, dict):
|
||||||
self.retry_config = RetryConfig(**self.retry_config)
|
self.retry_config = RetryConfig(**self.retry_config)
|
||||||
if isinstance(self.timeout_config, dict):
|
if isinstance(self.timeout_config, dict):
|
||||||
self.timeout_config = TimeoutConfig(**self.timeout_config)
|
self.timeout_config = TimeoutConfig(**self.timeout_config)
|
||||||
if isinstance(self.tls_config, dict):
|
|
||||||
self.tls_config = TlsConfig(**self.tls_config)
|
|
||||||
|
|||||||
@@ -96,73 +96,14 @@ class RemoteDBConnection(DBConnection):
|
|||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"RemoteConnect(name={self.db_name})"
|
return f"RemoteConnect(name={self.db_name})"
|
||||||
|
|
||||||
@override
|
|
||||||
def list_namespaces(
|
|
||||||
self,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
) -> Iterable[str]:
|
|
||||||
"""List immediate child namespace names in the given namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str], optional
|
|
||||||
The parent namespace to list namespaces in.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
page_token: str, optional
|
|
||||||
The token to use for pagination. If not present, start from the beginning.
|
|
||||||
limit: int, default 10
|
|
||||||
The size of the page to return.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
Iterable of str
|
|
||||||
List of immediate child namespace names
|
|
||||||
"""
|
|
||||||
return LOOP.run(
|
|
||||||
self._conn.list_namespaces(
|
|
||||||
namespace=namespace, page_token=page_token, limit=limit
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
|
||||||
def create_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""Create a new namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str]
|
|
||||||
The namespace identifier to create.
|
|
||||||
"""
|
|
||||||
LOOP.run(self._conn.create_namespace(namespace=namespace))
|
|
||||||
|
|
||||||
@override
|
|
||||||
def drop_namespace(self, namespace: List[str]) -> None:
|
|
||||||
"""Drop a namespace.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace: List[str]
|
|
||||||
The namespace identifier to drop.
|
|
||||||
"""
|
|
||||||
return LOOP.run(self._conn.drop_namespace(namespace=namespace))
|
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def table_names(
|
def table_names(
|
||||||
self,
|
self, page_token: Optional[str] = None, limit: int = 10
|
||||||
page_token: Optional[str] = None,
|
|
||||||
limit: int = 10,
|
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
"""List the names of all tables in the database.
|
"""List the names of all tables in the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
namespace: List[str], default []
|
|
||||||
The namespace to list tables in.
|
|
||||||
Empty list represents root namespace.
|
|
||||||
page_token: str
|
page_token: str
|
||||||
The last token to start the new page.
|
The last token to start the new page.
|
||||||
limit: int, default 10
|
limit: int, default 10
|
||||||
@@ -172,18 +113,13 @@ class RemoteDBConnection(DBConnection):
|
|||||||
-------
|
-------
|
||||||
An iterator of table names.
|
An iterator of table names.
|
||||||
"""
|
"""
|
||||||
return LOOP.run(
|
return LOOP.run(self._conn.table_names(start_after=page_token, limit=limit))
|
||||||
self._conn.table_names(
|
|
||||||
namespace=namespace, start_after=page_token, limit=limit
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def open_table(
|
def open_table(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
) -> Table:
|
) -> Table:
|
||||||
@@ -193,9 +129,6 @@ class RemoteDBConnection(DBConnection):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to open the table from.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -209,7 +142,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
" (there is no local cache to configure)"
|
" (there is no local cache to configure)"
|
||||||
)
|
)
|
||||||
|
|
||||||
table = LOOP.run(self._conn.open_table(name, namespace=namespace))
|
table = LOOP.run(self._conn.open_table(name))
|
||||||
return RemoteTable(table, self.db_name)
|
return RemoteTable(table, self.db_name)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
@@ -222,8 +155,6 @@ class RemoteDBConnection(DBConnection):
|
|||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
*,
|
|
||||||
namespace: List[str] = [],
|
|
||||||
) -> Table:
|
) -> Table:
|
||||||
"""Create a [Table][lancedb.table.Table] in the database.
|
"""Create a [Table][lancedb.table.Table] in the database.
|
||||||
|
|
||||||
@@ -231,9 +162,6 @@ class RemoteDBConnection(DBConnection):
|
|||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to create the table in.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
data: The data to initialize the table, *optional*
|
data: The data to initialize the table, *optional*
|
||||||
User must provide at least one of `data` or `schema`.
|
User must provide at least one of `data` or `schema`.
|
||||||
Acceptable types are:
|
Acceptable types are:
|
||||||
@@ -334,7 +262,6 @@ class RemoteDBConnection(DBConnection):
|
|||||||
self._conn.create_table(
|
self._conn.create_table(
|
||||||
name,
|
name,
|
||||||
data,
|
data,
|
||||||
namespace=namespace,
|
|
||||||
mode=mode,
|
mode=mode,
|
||||||
schema=schema,
|
schema=schema,
|
||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
@@ -344,27 +271,18 @@ class RemoteDBConnection(DBConnection):
|
|||||||
return RemoteTable(table, self.db_name)
|
return RemoteTable(table, self.db_name)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def drop_table(self, name: str, namespace: List[str] = []):
|
def drop_table(self, name: str):
|
||||||
"""Drop a table from the database.
|
"""Drop a table from the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
The name of the table.
|
The name of the table.
|
||||||
namespace: List[str], optional
|
|
||||||
The namespace to drop the table from.
|
|
||||||
None or empty list represents root namespace.
|
|
||||||
"""
|
"""
|
||||||
LOOP.run(self._conn.drop_table(name, namespace=namespace))
|
LOOP.run(self._conn.drop_table(name))
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def rename_table(
|
def rename_table(self, cur_name: str, new_name: str):
|
||||||
self,
|
|
||||||
cur_name: str,
|
|
||||||
new_name: str,
|
|
||||||
cur_namespace: List[str] = [],
|
|
||||||
new_namespace: List[str] = [],
|
|
||||||
):
|
|
||||||
"""Rename a table in the database.
|
"""Rename a table in the database.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -374,14 +292,7 @@ class RemoteDBConnection(DBConnection):
|
|||||||
new_name: str
|
new_name: str
|
||||||
The new name of the table.
|
The new name of the table.
|
||||||
"""
|
"""
|
||||||
LOOP.run(
|
LOOP.run(self._conn.rename_table(cur_name, new_name))
|
||||||
self._conn.rename_table(
|
|
||||||
cur_name,
|
|
||||||
new_name,
|
|
||||||
cur_namespace=cur_namespace,
|
|
||||||
new_namespace=new_namespace,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
"""Close the connection to the database."""
|
"""Close the connection to the database."""
|
||||||
|
|||||||
@@ -115,7 +115,6 @@ class RemoteTable(Table):
|
|||||||
*,
|
*,
|
||||||
replace: bool = False,
|
replace: bool = False,
|
||||||
wait_timeout: timedelta = None,
|
wait_timeout: timedelta = None,
|
||||||
name: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
"""Creates a scalar index
|
"""Creates a scalar index
|
||||||
Parameters
|
Parameters
|
||||||
@@ -140,11 +139,7 @@ class RemoteTable(Table):
|
|||||||
|
|
||||||
LOOP.run(
|
LOOP.run(
|
||||||
self._table.create_index(
|
self._table.create_index(
|
||||||
column,
|
column, config=config, replace=replace, wait_timeout=wait_timeout
|
||||||
config=config,
|
|
||||||
replace=replace,
|
|
||||||
wait_timeout=wait_timeout,
|
|
||||||
name=name,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -166,7 +161,6 @@ class RemoteTable(Table):
|
|||||||
ngram_min_length: int = 3,
|
ngram_min_length: int = 3,
|
||||||
ngram_max_length: int = 3,
|
ngram_max_length: int = 3,
|
||||||
prefix_only: bool = False,
|
prefix_only: bool = False,
|
||||||
name: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
config = FTS(
|
config = FTS(
|
||||||
with_position=with_position,
|
with_position=with_position,
|
||||||
@@ -183,11 +177,7 @@ class RemoteTable(Table):
|
|||||||
)
|
)
|
||||||
LOOP.run(
|
LOOP.run(
|
||||||
self._table.create_index(
|
self._table.create_index(
|
||||||
column,
|
column, config=config, replace=replace, wait_timeout=wait_timeout
|
||||||
config=config,
|
|
||||||
replace=replace,
|
|
||||||
wait_timeout=wait_timeout,
|
|
||||||
name=name,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -204,8 +194,6 @@ class RemoteTable(Table):
|
|||||||
wait_timeout: Optional[timedelta] = None,
|
wait_timeout: Optional[timedelta] = None,
|
||||||
*,
|
*,
|
||||||
num_bits: int = 8,
|
num_bits: int = 8,
|
||||||
name: Optional[str] = None,
|
|
||||||
train: bool = True,
|
|
||||||
):
|
):
|
||||||
"""Create an index on the table.
|
"""Create an index on the table.
|
||||||
Currently, the only parameters that matter are
|
Currently, the only parameters that matter are
|
||||||
@@ -282,11 +270,7 @@ class RemoteTable(Table):
|
|||||||
|
|
||||||
LOOP.run(
|
LOOP.run(
|
||||||
self._table.create_index(
|
self._table.create_index(
|
||||||
vector_column_name,
|
vector_column_name, config=config, wait_timeout=wait_timeout
|
||||||
config=config,
|
|
||||||
wait_timeout=wait_timeout,
|
|
||||||
name=name,
|
|
||||||
train=train,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -689,8 +689,6 @@ class Table(ABC):
|
|||||||
sample_rate: int = 256,
|
sample_rate: int = 256,
|
||||||
m: int = 20,
|
m: int = 20,
|
||||||
ef_construction: int = 300,
|
ef_construction: int = 300,
|
||||||
name: Optional[str] = None,
|
|
||||||
train: bool = True,
|
|
||||||
):
|
):
|
||||||
"""Create an index on the table.
|
"""Create an index on the table.
|
||||||
|
|
||||||
@@ -723,11 +721,6 @@ class Table(ABC):
|
|||||||
Only 4 and 8 are supported.
|
Only 4 and 8 are supported.
|
||||||
wait_timeout: timedelta, optional
|
wait_timeout: timedelta, optional
|
||||||
The timeout to wait if indexing is asynchronous.
|
The timeout to wait if indexing is asynchronous.
|
||||||
name: str, optional
|
|
||||||
The name of the index. If not provided, a default name will be generated.
|
|
||||||
train: bool, default True
|
|
||||||
Whether to train the index with existing data. Vector indices always train
|
|
||||||
with existing data.
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -783,7 +776,6 @@ class Table(ABC):
|
|||||||
replace: bool = True,
|
replace: bool = True,
|
||||||
index_type: ScalarIndexType = "BTREE",
|
index_type: ScalarIndexType = "BTREE",
|
||||||
wait_timeout: Optional[timedelta] = None,
|
wait_timeout: Optional[timedelta] = None,
|
||||||
name: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
"""Create a scalar index on a column.
|
"""Create a scalar index on a column.
|
||||||
|
|
||||||
@@ -798,8 +790,6 @@ class Table(ABC):
|
|||||||
The type of index to create.
|
The type of index to create.
|
||||||
wait_timeout: timedelta, optional
|
wait_timeout: timedelta, optional
|
||||||
The timeout to wait if indexing is asynchronous.
|
The timeout to wait if indexing is asynchronous.
|
||||||
name: str, optional
|
|
||||||
The name of the index. If not provided, a default name will be generated.
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
|
||||||
@@ -862,7 +852,6 @@ class Table(ABC):
|
|||||||
ngram_max_length: int = 3,
|
ngram_max_length: int = 3,
|
||||||
prefix_only: bool = False,
|
prefix_only: bool = False,
|
||||||
wait_timeout: Optional[timedelta] = None,
|
wait_timeout: Optional[timedelta] = None,
|
||||||
name: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
"""Create a full-text search index on the table.
|
"""Create a full-text search index on the table.
|
||||||
|
|
||||||
@@ -927,8 +916,6 @@ class Table(ABC):
|
|||||||
Whether to only index the prefix of the token for ngram tokenizer.
|
Whether to only index the prefix of the token for ngram tokenizer.
|
||||||
wait_timeout: timedelta, optional
|
wait_timeout: timedelta, optional
|
||||||
The timeout to wait if indexing is asynchronous.
|
The timeout to wait if indexing is asynchronous.
|
||||||
name: str, optional
|
|
||||||
The name of the index. If not provided, a default name will be generated.
|
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@@ -1119,9 +1106,7 @@ class Table(ABC):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def take_offsets(
|
def take_offsets(self, offsets: list[int]) -> LanceTakeQueryBuilder:
|
||||||
self, offsets: list[int], *, with_row_id: bool = False
|
|
||||||
) -> LanceTakeQueryBuilder:
|
|
||||||
"""
|
"""
|
||||||
Take a list of offsets from the table.
|
Take a list of offsets from the table.
|
||||||
|
|
||||||
@@ -1147,60 +1132,8 @@ class Table(ABC):
|
|||||||
A record batch containing the rows at the given offsets.
|
A record batch containing the rows at the given offsets.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __getitems__(self, offsets: list[int]) -> pa.RecordBatch:
|
|
||||||
"""
|
|
||||||
Take a list of offsets from the table and return as a record batch.
|
|
||||||
|
|
||||||
This method uses the `take_offsets` method to take the rows. However, it
|
|
||||||
aligns the offsets to the passed in offsets. This means the return type
|
|
||||||
is a record batch (and so users should take care not to pass in too many
|
|
||||||
offsets)
|
|
||||||
|
|
||||||
Note: this method is primarily intended to fulfill the Dataset contract
|
|
||||||
for pytorch.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
offsets: list[int]
|
|
||||||
The offsets to take.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
pa.RecordBatch
|
|
||||||
A record batch containing the rows at the given offsets.
|
|
||||||
"""
|
|
||||||
# We don't know the order of the results at all. So we calculate a permutation
|
|
||||||
# for ordering the given offsets. Then we load the data with the _rowoffset
|
|
||||||
# column. Then we sort by _rowoffset and apply the inverse of the permutation
|
|
||||||
# that we calculated.
|
|
||||||
#
|
|
||||||
# Note: this is potentially a lot of memory copy if we're operating on large
|
|
||||||
# batches :(
|
|
||||||
num_offsets = len(offsets)
|
|
||||||
indices = list(range(num_offsets))
|
|
||||||
permutation = sorted(indices, key=lambda idx: offsets[idx])
|
|
||||||
permutation_inv = [0] * num_offsets
|
|
||||||
for i in range(num_offsets):
|
|
||||||
permutation_inv[permutation[i]] = i
|
|
||||||
|
|
||||||
columns = self.schema.names
|
|
||||||
columns.append("_rowoffset")
|
|
||||||
tbl = (
|
|
||||||
self.take_offsets(offsets)
|
|
||||||
.select(columns)
|
|
||||||
.to_arrow()
|
|
||||||
.sort_by("_rowoffset")
|
|
||||||
.take(permutation_inv)
|
|
||||||
.combine_chunks()
|
|
||||||
.drop_columns(["_rowoffset"])
|
|
||||||
)
|
|
||||||
|
|
||||||
return tbl
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def take_row_ids(
|
def take_row_ids(self, row_ids: list[int]) -> LanceTakeQueryBuilder:
|
||||||
self, row_ids: list[int], *, with_row_id: bool = False
|
|
||||||
) -> LanceTakeQueryBuilder:
|
|
||||||
"""
|
"""
|
||||||
Take a list of row ids from the table.
|
Take a list of row ids from the table.
|
||||||
|
|
||||||
@@ -1706,16 +1639,13 @@ class LanceTable(Table):
|
|||||||
connection: "LanceDBConnection",
|
connection: "LanceDBConnection",
|
||||||
name: str,
|
name: str,
|
||||||
*,
|
*,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
):
|
):
|
||||||
self._conn = connection
|
self._conn = connection
|
||||||
self._namespace = namespace
|
|
||||||
self._table = LOOP.run(
|
self._table = LOOP.run(
|
||||||
connection._conn.open_table(
|
connection._conn.open_table(
|
||||||
name,
|
name,
|
||||||
namespace=namespace,
|
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
)
|
)
|
||||||
@@ -1726,8 +1656,8 @@ class LanceTable(Table):
|
|||||||
return self._table.name
|
return self._table.name
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def open(cls, db, name, *, namespace: List[str] = [], **kwargs):
|
def open(cls, db, name, **kwargs):
|
||||||
tbl = cls(db, name, namespace=namespace, **kwargs)
|
tbl = cls(db, name, **kwargs)
|
||||||
|
|
||||||
# check the dataset exists
|
# check the dataset exists
|
||||||
try:
|
try:
|
||||||
@@ -1999,9 +1929,6 @@ class LanceTable(Table):
|
|||||||
sample_rate: int = 256,
|
sample_rate: int = 256,
|
||||||
m: int = 20,
|
m: int = 20,
|
||||||
ef_construction: int = 300,
|
ef_construction: int = 300,
|
||||||
*,
|
|
||||||
name: Optional[str] = None,
|
|
||||||
train: bool = True,
|
|
||||||
):
|
):
|
||||||
"""Create an index on the table."""
|
"""Create an index on the table."""
|
||||||
if accelerator is not None:
|
if accelerator is not None:
|
||||||
@@ -2065,8 +1992,6 @@ class LanceTable(Table):
|
|||||||
vector_column_name,
|
vector_column_name,
|
||||||
replace=replace,
|
replace=replace,
|
||||||
config=config,
|
config=config,
|
||||||
name=name,
|
|
||||||
train=train,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -2111,7 +2036,6 @@ class LanceTable(Table):
|
|||||||
*,
|
*,
|
||||||
replace: bool = True,
|
replace: bool = True,
|
||||||
index_type: ScalarIndexType = "BTREE",
|
index_type: ScalarIndexType = "BTREE",
|
||||||
name: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
if index_type == "BTREE":
|
if index_type == "BTREE":
|
||||||
config = BTree()
|
config = BTree()
|
||||||
@@ -2122,7 +2046,7 @@ class LanceTable(Table):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown index type {index_type}")
|
raise ValueError(f"Unknown index type {index_type}")
|
||||||
return LOOP.run(
|
return LOOP.run(
|
||||||
self._table.create_index(column, replace=replace, config=config, name=name)
|
self._table.create_index(column, replace=replace, config=config)
|
||||||
)
|
)
|
||||||
|
|
||||||
def create_fts_index(
|
def create_fts_index(
|
||||||
@@ -2146,7 +2070,6 @@ class LanceTable(Table):
|
|||||||
ngram_min_length: int = 3,
|
ngram_min_length: int = 3,
|
||||||
ngram_max_length: int = 3,
|
ngram_max_length: int = 3,
|
||||||
prefix_only: bool = False,
|
prefix_only: bool = False,
|
||||||
name: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
if not use_tantivy:
|
if not use_tantivy:
|
||||||
if not isinstance(field_names, str):
|
if not isinstance(field_names, str):
|
||||||
@@ -2184,7 +2107,6 @@ class LanceTable(Table):
|
|||||||
field_names,
|
field_names,
|
||||||
replace=replace,
|
replace=replace,
|
||||||
config=config,
|
config=config,
|
||||||
name=name,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
@@ -2551,7 +2473,6 @@ class LanceTable(Table):
|
|||||||
fill_value: float = 0.0,
|
fill_value: float = 0.0,
|
||||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||||
*,
|
*,
|
||||||
namespace: List[str] = [],
|
|
||||||
storage_options: Optional[Dict[str, str | bool]] = None,
|
storage_options: Optional[Dict[str, str | bool]] = None,
|
||||||
data_storage_version: Optional[str] = None,
|
data_storage_version: Optional[str] = None,
|
||||||
enable_v2_manifest_paths: Optional[bool] = None,
|
enable_v2_manifest_paths: Optional[bool] = None,
|
||||||
@@ -2611,7 +2532,6 @@ class LanceTable(Table):
|
|||||||
"""
|
"""
|
||||||
self = cls.__new__(cls)
|
self = cls.__new__(cls)
|
||||||
self._conn = db
|
self._conn = db
|
||||||
self._namespace = namespace
|
|
||||||
|
|
||||||
if data_storage_version is not None:
|
if data_storage_version is not None:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
@@ -2644,7 +2564,6 @@ class LanceTable(Table):
|
|||||||
on_bad_vectors=on_bad_vectors,
|
on_bad_vectors=on_bad_vectors,
|
||||||
fill_value=fill_value,
|
fill_value=fill_value,
|
||||||
embedding_functions=embedding_functions,
|
embedding_functions=embedding_functions,
|
||||||
namespace=namespace,
|
|
||||||
storage_options=storage_options,
|
storage_options=storage_options,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -3332,8 +3251,6 @@ class AsyncTable:
|
|||||||
Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
|
Union[IvfFlat, IvfPq, HnswPq, HnswSq, BTree, Bitmap, LabelList, FTS]
|
||||||
] = None,
|
] = None,
|
||||||
wait_timeout: Optional[timedelta] = None,
|
wait_timeout: Optional[timedelta] = None,
|
||||||
name: Optional[str] = None,
|
|
||||||
train: bool = True,
|
|
||||||
):
|
):
|
||||||
"""Create an index to speed up queries
|
"""Create an index to speed up queries
|
||||||
|
|
||||||
@@ -3360,11 +3277,6 @@ class AsyncTable:
|
|||||||
creating an index object.
|
creating an index object.
|
||||||
wait_timeout: timedelta, optional
|
wait_timeout: timedelta, optional
|
||||||
The timeout to wait if indexing is asynchronous.
|
The timeout to wait if indexing is asynchronous.
|
||||||
name: str, optional
|
|
||||||
The name of the index. If not provided, a default name will be generated.
|
|
||||||
train: bool, default True
|
|
||||||
Whether to train the index with existing data. Vector indices always train
|
|
||||||
with existing data.
|
|
||||||
"""
|
"""
|
||||||
if config is not None:
|
if config is not None:
|
||||||
if not isinstance(
|
if not isinstance(
|
||||||
@@ -3376,12 +3288,7 @@ class AsyncTable:
|
|||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await self._inner.create_index(
|
await self._inner.create_index(
|
||||||
column,
|
column, index=config, replace=replace, wait_timeout=wait_timeout
|
||||||
index=config,
|
|
||||||
replace=replace,
|
|
||||||
wait_timeout=wait_timeout,
|
|
||||||
name=name,
|
|
||||||
train=train,
|
|
||||||
)
|
)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
if "not support the requested language" in str(e):
|
if "not support the requested language" in str(e):
|
||||||
|
|||||||
@@ -175,18 +175,6 @@ def test_table_names(tmp_db: lancedb.DBConnection):
|
|||||||
tmp_db.create_table("test3", data=data)
|
tmp_db.create_table("test3", data=data)
|
||||||
assert tmp_db.table_names() == ["test1", "test2", "test3"]
|
assert tmp_db.table_names() == ["test1", "test2", "test3"]
|
||||||
|
|
||||||
# Test that positional arguments for page_token and limit
|
|
||||||
result = list(tmp_db.table_names("test1", 1)) # page_token="test1", limit=1
|
|
||||||
assert result == ["test2"], f"Expected ['test2'], got {result}"
|
|
||||||
|
|
||||||
# Test mixed positional and keyword arguments
|
|
||||||
result = list(tmp_db.table_names("test2", limit=2))
|
|
||||||
assert result == ["test3"], f"Expected ['test3'], got {result}"
|
|
||||||
|
|
||||||
# Test that namespace parameter can be passed as keyword
|
|
||||||
result = list(tmp_db.table_names(namespace=[]))
|
|
||||||
assert len(result) == 3
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_table_names_async(tmp_path):
|
async def test_table_names_async(tmp_path):
|
||||||
@@ -740,93 +728,3 @@ def test_bypass_vector_index_sync(tmp_db: lancedb.DBConnection):
|
|||||||
table.search(sample_key).bypass_vector_index().explain_plan(verbose=True)
|
table.search(sample_key).bypass_vector_index().explain_plan(verbose=True)
|
||||||
)
|
)
|
||||||
assert "KNN" in plan_without_index
|
assert "KNN" in plan_without_index
|
||||||
|
|
||||||
|
|
||||||
def test_local_namespace_operations(tmp_path):
|
|
||||||
"""Test that local mode namespace operations behave as expected."""
|
|
||||||
# Create a local database connection
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
|
|
||||||
# Test list_namespaces returns empty list
|
|
||||||
namespaces = list(db.list_namespaces())
|
|
||||||
assert namespaces == []
|
|
||||||
|
|
||||||
# Test list_namespaces with parameters still returns empty list
|
|
||||||
namespaces_with_params = list(
|
|
||||||
db.list_namespaces(namespace=["test"], page_token="token", limit=5)
|
|
||||||
)
|
|
||||||
assert namespaces_with_params == []
|
|
||||||
|
|
||||||
|
|
||||||
def test_local_create_namespace_not_supported(tmp_path):
|
|
||||||
"""Test that create_namespace is not supported in local mode."""
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
|
|
||||||
with pytest.raises(
|
|
||||||
NotImplementedError,
|
|
||||||
match="Namespace operations are not supported for listing database",
|
|
||||||
):
|
|
||||||
db.create_namespace(["test_namespace"])
|
|
||||||
|
|
||||||
|
|
||||||
def test_local_drop_namespace_not_supported(tmp_path):
|
|
||||||
"""Test that drop_namespace is not supported in local mode."""
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
|
|
||||||
with pytest.raises(
|
|
||||||
NotImplementedError,
|
|
||||||
match="Namespace operations are not supported for listing database",
|
|
||||||
):
|
|
||||||
db.drop_namespace(["test_namespace"])
|
|
||||||
|
|
||||||
|
|
||||||
def test_local_table_operations_with_namespace_raise_error(tmp_path):
|
|
||||||
"""
|
|
||||||
Test that table operations with namespace parameter
|
|
||||||
raise ValueError in local mode.
|
|
||||||
"""
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
|
|
||||||
# Create some test data
|
|
||||||
data = [{"vector": [1.0, 2.0], "item": "test"}]
|
|
||||||
schema = pa.schema(
|
|
||||||
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("item", pa.string())]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test create_table with namespace - should raise ValueError
|
|
||||||
with pytest.raises(
|
|
||||||
NotImplementedError,
|
|
||||||
match="Namespace parameter is not supported for listing database",
|
|
||||||
):
|
|
||||||
db.create_table(
|
|
||||||
"test_table_with_ns", data=data, schema=schema, namespace=["test_ns"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create table normally for other tests
|
|
||||||
db.create_table("test_table", data=data, schema=schema)
|
|
||||||
assert "test_table" in db.table_names()
|
|
||||||
|
|
||||||
# Test open_table with namespace - should raise ValueError
|
|
||||||
with pytest.raises(
|
|
||||||
NotImplementedError,
|
|
||||||
match="Namespace parameter is not supported for listing database",
|
|
||||||
):
|
|
||||||
db.open_table("test_table", namespace=["test_ns"])
|
|
||||||
|
|
||||||
# Test table_names with namespace - should raise ValueError
|
|
||||||
with pytest.raises(
|
|
||||||
NotImplementedError,
|
|
||||||
match="Namespace parameter is not supported for listing database",
|
|
||||||
):
|
|
||||||
list(db.table_names(namespace=["test_ns"]))
|
|
||||||
|
|
||||||
# Test drop_table with namespace - should raise ValueError
|
|
||||||
with pytest.raises(
|
|
||||||
NotImplementedError,
|
|
||||||
match="Namespace parameter is not supported for listing database",
|
|
||||||
):
|
|
||||||
db.drop_table("test_table", namespace=["test_ns"])
|
|
||||||
|
|
||||||
# Test table_names without namespace - should work normally
|
|
||||||
tables_root = list(db.table_names())
|
|
||||||
assert "test_table" in tables_root
|
|
||||||
|
|||||||
@@ -157,16 +157,7 @@ def test_create_index_with_stemming(tmp_path, table):
|
|||||||
def test_create_inverted_index(table, use_tantivy, with_position):
|
def test_create_inverted_index(table, use_tantivy, with_position):
|
||||||
if use_tantivy and not with_position:
|
if use_tantivy and not with_position:
|
||||||
pytest.skip("we don't support building a tantivy index without position")
|
pytest.skip("we don't support building a tantivy index without position")
|
||||||
table.create_fts_index(
|
table.create_fts_index("text", use_tantivy=use_tantivy, with_position=with_position)
|
||||||
"text",
|
|
||||||
use_tantivy=use_tantivy,
|
|
||||||
with_position=with_position,
|
|
||||||
name="custom_fts_index",
|
|
||||||
)
|
|
||||||
if not use_tantivy:
|
|
||||||
indices = table.list_indices()
|
|
||||||
fts_indices = [i for i in indices if i.index_type == "FTS"]
|
|
||||||
assert any(i.name == "custom_fts_index" for i in fts_indices)
|
|
||||||
|
|
||||||
|
|
||||||
def test_populate_index(tmp_path, table):
|
def test_populate_index(tmp_path, table):
|
||||||
|
|||||||
@@ -1,707 +0,0 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
"""Tests for LanceDB namespace integration."""
|
|
||||||
|
|
||||||
import tempfile
|
|
||||||
import shutil
|
|
||||||
from typing import Dict, Optional
|
|
||||||
import pytest
|
|
||||||
import pyarrow as pa
|
|
||||||
import lancedb
|
|
||||||
from lance_namespace.namespace import NATIVE_IMPLS, LanceNamespace
|
|
||||||
from lance_namespace_urllib3_client.models import (
|
|
||||||
ListTablesRequest,
|
|
||||||
ListTablesResponse,
|
|
||||||
DescribeTableRequest,
|
|
||||||
DescribeTableResponse,
|
|
||||||
RegisterTableRequest,
|
|
||||||
RegisterTableResponse,
|
|
||||||
DeregisterTableRequest,
|
|
||||||
DeregisterTableResponse,
|
|
||||||
CreateTableRequest,
|
|
||||||
CreateTableResponse,
|
|
||||||
DropTableRequest,
|
|
||||||
DropTableResponse,
|
|
||||||
ListNamespacesRequest,
|
|
||||||
ListNamespacesResponse,
|
|
||||||
CreateNamespaceRequest,
|
|
||||||
CreateNamespaceResponse,
|
|
||||||
DropNamespaceRequest,
|
|
||||||
DropNamespaceResponse,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TempNamespace(LanceNamespace):
|
|
||||||
"""A simple dictionary-backed namespace for testing."""
|
|
||||||
|
|
||||||
# Class-level storage to persist table registry across instances
|
|
||||||
_global_registry: Dict[str, Dict[str, str]] = {}
|
|
||||||
# Class-level storage for namespaces (supporting 1-level namespace)
|
|
||||||
_global_namespaces: Dict[str, set] = {}
|
|
||||||
|
|
||||||
def __init__(self, **properties):
|
|
||||||
"""Initialize the test namespace.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
root: The root directory for tables (optional)
|
|
||||||
**properties: Additional configuration properties
|
|
||||||
"""
|
|
||||||
self.config = TempNamespaceConfig(properties)
|
|
||||||
# Use the root as a key to maintain separate registries per root
|
|
||||||
root = self.config.root
|
|
||||||
if root not in self._global_registry:
|
|
||||||
self._global_registry[root] = {}
|
|
||||||
if root not in self._global_namespaces:
|
|
||||||
self._global_namespaces[root] = set()
|
|
||||||
self.tables = self._global_registry[root] # Reference to shared registry
|
|
||||||
self.namespaces = self._global_namespaces[
|
|
||||||
root
|
|
||||||
] # Reference to shared namespaces
|
|
||||||
|
|
||||||
def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
|
|
||||||
"""List all tables in the namespace."""
|
|
||||||
if not request.id:
|
|
||||||
# List all tables in root namespace
|
|
||||||
tables = [name for name in self.tables.keys() if "." not in name]
|
|
||||||
else:
|
|
||||||
# List tables in specific namespace (1-level only)
|
|
||||||
if len(request.id) == 1:
|
|
||||||
namespace_name = request.id[0]
|
|
||||||
prefix = f"{namespace_name}."
|
|
||||||
tables = [
|
|
||||||
name[len(prefix) :]
|
|
||||||
for name in self.tables.keys()
|
|
||||||
if name.startswith(prefix)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
# Multi-level namespaces not supported
|
|
||||||
raise ValueError("Only 1-level namespaces are supported")
|
|
||||||
return ListTablesResponse(tables=tables)
|
|
||||||
|
|
||||||
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
|
|
||||||
"""Describe a table by returning its location."""
|
|
||||||
if not request.id:
|
|
||||||
raise ValueError("Invalid table ID")
|
|
||||||
|
|
||||||
if len(request.id) == 1:
|
|
||||||
# Root namespace table
|
|
||||||
table_name = request.id[0]
|
|
||||||
elif len(request.id) == 2:
|
|
||||||
# Namespaced table (1-level namespace)
|
|
||||||
namespace_name, table_name = request.id
|
|
||||||
table_name = f"{namespace_name}.{table_name}"
|
|
||||||
else:
|
|
||||||
raise ValueError("Only 1-level namespaces are supported")
|
|
||||||
|
|
||||||
if table_name not in self.tables:
|
|
||||||
raise RuntimeError(f"Table does not exist: {table_name}")
|
|
||||||
|
|
||||||
table_uri = self.tables[table_name]
|
|
||||||
return DescribeTableResponse(location=table_uri)
|
|
||||||
|
|
||||||
def create_table(
|
|
||||||
self, request: CreateTableRequest, request_data: bytes
|
|
||||||
) -> CreateTableResponse:
|
|
||||||
"""Create a table in the namespace."""
|
|
||||||
if not request.id:
|
|
||||||
raise ValueError("Invalid table ID")
|
|
||||||
|
|
||||||
if len(request.id) == 1:
|
|
||||||
# Root namespace table
|
|
||||||
table_name = request.id[0]
|
|
||||||
table_uri = f"{self.config.root}/{table_name}.lance"
|
|
||||||
elif len(request.id) == 2:
|
|
||||||
# Namespaced table (1-level namespace)
|
|
||||||
namespace_name, base_table_name = request.id
|
|
||||||
# Add namespace to our namespace set
|
|
||||||
self.namespaces.add(namespace_name)
|
|
||||||
table_name = f"{namespace_name}.{base_table_name}"
|
|
||||||
table_uri = f"{self.config.root}/{namespace_name}/{base_table_name}.lance"
|
|
||||||
else:
|
|
||||||
raise ValueError("Only 1-level namespaces are supported")
|
|
||||||
|
|
||||||
# Check if table already exists
|
|
||||||
if table_name in self.tables:
|
|
||||||
if request.mode == "overwrite":
|
|
||||||
# Drop existing table for overwrite mode
|
|
||||||
del self.tables[table_name]
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f"Table already exists: {table_name}")
|
|
||||||
|
|
||||||
# Parse the Arrow IPC stream to get the schema and create the actual table
|
|
||||||
import pyarrow.ipc as ipc
|
|
||||||
import io
|
|
||||||
import lance
|
|
||||||
import os
|
|
||||||
|
|
||||||
# Create directory if needed for namespaced tables
|
|
||||||
os.makedirs(os.path.dirname(table_uri), exist_ok=True)
|
|
||||||
|
|
||||||
# Read the IPC stream
|
|
||||||
reader = ipc.open_stream(io.BytesIO(request_data))
|
|
||||||
table = reader.read_all()
|
|
||||||
|
|
||||||
# Create the actual Lance table
|
|
||||||
lance.write_dataset(table, table_uri)
|
|
||||||
|
|
||||||
# Store the table mapping
|
|
||||||
self.tables[table_name] = table_uri
|
|
||||||
|
|
||||||
return CreateTableResponse(location=table_uri)
|
|
||||||
|
|
||||||
def drop_table(self, request: DropTableRequest) -> DropTableResponse:
|
|
||||||
"""Drop a table from the namespace."""
|
|
||||||
if not request.id:
|
|
||||||
raise ValueError("Invalid table ID")
|
|
||||||
|
|
||||||
if len(request.id) == 1:
|
|
||||||
# Root namespace table
|
|
||||||
table_name = request.id[0]
|
|
||||||
elif len(request.id) == 2:
|
|
||||||
# Namespaced table (1-level namespace)
|
|
||||||
namespace_name, base_table_name = request.id
|
|
||||||
table_name = f"{namespace_name}.{base_table_name}"
|
|
||||||
else:
|
|
||||||
raise ValueError("Only 1-level namespaces are supported")
|
|
||||||
|
|
||||||
if table_name not in self.tables:
|
|
||||||
raise RuntimeError(f"Table does not exist: {table_name}")
|
|
||||||
|
|
||||||
# Get the table URI
|
|
||||||
table_uri = self.tables[table_name]
|
|
||||||
|
|
||||||
# Delete the actual table files
|
|
||||||
import shutil
|
|
||||||
import os
|
|
||||||
|
|
||||||
if os.path.exists(table_uri):
|
|
||||||
shutil.rmtree(table_uri, ignore_errors=True)
|
|
||||||
|
|
||||||
# Remove from registry
|
|
||||||
del self.tables[table_name]
|
|
||||||
|
|
||||||
return DropTableResponse()
|
|
||||||
|
|
||||||
def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse:
|
|
||||||
"""Register a table with the namespace."""
|
|
||||||
if not request.id or len(request.id) != 1:
|
|
||||||
raise ValueError("Invalid table ID")
|
|
||||||
|
|
||||||
if not request.location:
|
|
||||||
raise ValueError("Table location is required")
|
|
||||||
|
|
||||||
table_name = request.id[0]
|
|
||||||
self.tables[table_name] = request.location
|
|
||||||
|
|
||||||
return RegisterTableResponse()
|
|
||||||
|
|
||||||
def deregister_table(
|
|
||||||
self, request: DeregisterTableRequest
|
|
||||||
) -> DeregisterTableResponse:
|
|
||||||
"""Deregister a table from the namespace."""
|
|
||||||
if not request.id or len(request.id) != 1:
|
|
||||||
raise ValueError("Invalid table ID")
|
|
||||||
|
|
||||||
table_name = request.id[0]
|
|
||||||
if table_name not in self.tables:
|
|
||||||
raise RuntimeError(f"Table does not exist: {table_name}")
|
|
||||||
|
|
||||||
del self.tables[table_name]
|
|
||||||
return DeregisterTableResponse()
|
|
||||||
|
|
||||||
def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse:
|
|
||||||
"""List child namespaces."""
|
|
||||||
if not request.id:
|
|
||||||
# List root-level namespaces
|
|
||||||
namespaces = list(self.namespaces)
|
|
||||||
elif len(request.id) == 1:
|
|
||||||
# For 1-level namespace, there are no child namespaces
|
|
||||||
namespaces = []
|
|
||||||
else:
|
|
||||||
raise ValueError("Only 1-level namespaces are supported")
|
|
||||||
|
|
||||||
return ListNamespacesResponse(namespaces=namespaces)
|
|
||||||
|
|
||||||
def create_namespace(
|
|
||||||
self, request: CreateNamespaceRequest
|
|
||||||
) -> CreateNamespaceResponse:
|
|
||||||
"""Create a namespace."""
|
|
||||||
if not request.id:
|
|
||||||
raise ValueError("Invalid namespace ID")
|
|
||||||
|
|
||||||
if len(request.id) == 1:
|
|
||||||
# Create 1-level namespace
|
|
||||||
namespace_name = request.id[0]
|
|
||||||
self.namespaces.add(namespace_name)
|
|
||||||
|
|
||||||
# Create directory for the namespace
|
|
||||||
import os
|
|
||||||
|
|
||||||
namespace_dir = f"{self.config.root}/{namespace_name}"
|
|
||||||
os.makedirs(namespace_dir, exist_ok=True)
|
|
||||||
else:
|
|
||||||
raise ValueError("Only 1-level namespaces are supported")
|
|
||||||
|
|
||||||
return CreateNamespaceResponse()
|
|
||||||
|
|
||||||
def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse:
|
|
||||||
"""Drop a namespace."""
|
|
||||||
if not request.id:
|
|
||||||
raise ValueError("Invalid namespace ID")
|
|
||||||
|
|
||||||
if len(request.id) == 1:
|
|
||||||
# Drop 1-level namespace
|
|
||||||
namespace_name = request.id[0]
|
|
||||||
|
|
||||||
if namespace_name not in self.namespaces:
|
|
||||||
raise RuntimeError(f"Namespace does not exist: {namespace_name}")
|
|
||||||
|
|
||||||
# Check if namespace has any tables
|
|
||||||
prefix = f"{namespace_name}."
|
|
||||||
tables_in_namespace = [
|
|
||||||
name for name in self.tables.keys() if name.startswith(prefix)
|
|
||||||
]
|
|
||||||
if tables_in_namespace:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Cannot drop namespace '{namespace_name}': contains tables"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Remove namespace
|
|
||||||
self.namespaces.remove(namespace_name)
|
|
||||||
|
|
||||||
# Remove directory
|
|
||||||
import shutil
|
|
||||||
import os
|
|
||||||
|
|
||||||
namespace_dir = f"{self.config.root}/{namespace_name}"
|
|
||||||
if os.path.exists(namespace_dir):
|
|
||||||
shutil.rmtree(namespace_dir, ignore_errors=True)
|
|
||||||
else:
|
|
||||||
raise ValueError("Only 1-level namespaces are supported")
|
|
||||||
|
|
||||||
return DropNamespaceResponse()
|
|
||||||
|
|
||||||
|
|
||||||
class TempNamespaceConfig:
|
|
||||||
"""Configuration for TestNamespace."""
|
|
||||||
|
|
||||||
ROOT = "root"
|
|
||||||
|
|
||||||
def __init__(self, properties: Optional[Dict[str, str]] = None):
|
|
||||||
"""Initialize configuration from properties.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
properties: Dictionary of configuration properties
|
|
||||||
"""
|
|
||||||
if properties is None:
|
|
||||||
properties = {}
|
|
||||||
|
|
||||||
self._root = properties.get(self.ROOT, "/tmp")
|
|
||||||
|
|
||||||
@property
|
|
||||||
def root(self) -> str:
|
|
||||||
"""Get the namespace root directory."""
|
|
||||||
return self._root
|
|
||||||
|
|
||||||
|
|
||||||
NATIVE_IMPLS["temp"] = f"{TempNamespace.__module__}.TempNamespace"
|
|
||||||
|
|
||||||
|
|
||||||
class TestNamespaceConnection:
|
|
||||||
"""Test namespace-based LanceDB connection."""
|
|
||||||
|
|
||||||
def setup_method(self):
|
|
||||||
"""Set up test fixtures."""
|
|
||||||
self.temp_dir = tempfile.mkdtemp()
|
|
||||||
# Clear the TestNamespace registry for this test
|
|
||||||
if self.temp_dir in TempNamespace._global_registry:
|
|
||||||
TempNamespace._global_registry[self.temp_dir].clear()
|
|
||||||
if self.temp_dir in TempNamespace._global_namespaces:
|
|
||||||
TempNamespace._global_namespaces[self.temp_dir].clear()
|
|
||||||
|
|
||||||
def teardown_method(self):
|
|
||||||
"""Clean up test fixtures."""
|
|
||||||
# Clear the TestNamespace registry
|
|
||||||
if self.temp_dir in TempNamespace._global_registry:
|
|
||||||
del TempNamespace._global_registry[self.temp_dir]
|
|
||||||
if self.temp_dir in TempNamespace._global_namespaces:
|
|
||||||
del TempNamespace._global_namespaces[self.temp_dir]
|
|
||||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
||||||
|
|
||||||
def test_connect_namespace_test(self):
|
|
||||||
"""Test connecting to LanceDB through TestNamespace."""
|
|
||||||
# Connect using TestNamespace
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Should be a LanceNamespaceDBConnection
|
|
||||||
assert isinstance(db, lancedb.LanceNamespaceDBConnection)
|
|
||||||
|
|
||||||
# Initially no tables
|
|
||||||
assert len(list(db.table_names())) == 0
|
|
||||||
|
|
||||||
def test_create_table_through_namespace(self):
|
|
||||||
"""Test creating a table through namespace."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Define schema for empty table
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
pa.field("text", pa.string()),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create empty table
|
|
||||||
table = db.create_table("test_table", schema=schema)
|
|
||||||
assert table is not None
|
|
||||||
assert table.name == "test_table"
|
|
||||||
|
|
||||||
# Table should appear in namespace
|
|
||||||
table_names = list(db.table_names())
|
|
||||||
assert "test_table" in table_names
|
|
||||||
assert len(table_names) == 1
|
|
||||||
|
|
||||||
# Verify empty table
|
|
||||||
result = table.to_pandas()
|
|
||||||
assert len(result) == 0
|
|
||||||
assert list(result.columns) == ["id", "vector", "text"]
|
|
||||||
|
|
||||||
def test_open_table_through_namespace(self):
|
|
||||||
"""Test opening an existing table through namespace."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Create a table with schema
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
db.create_table("test_table", schema=schema)
|
|
||||||
|
|
||||||
# Open the table
|
|
||||||
table = db.open_table("test_table")
|
|
||||||
assert table is not None
|
|
||||||
assert table.name == "test_table"
|
|
||||||
|
|
||||||
# Verify empty table with correct schema
|
|
||||||
result = table.to_pandas()
|
|
||||||
assert len(result) == 0
|
|
||||||
assert list(result.columns) == ["id", "vector"]
|
|
||||||
|
|
||||||
def test_drop_table_through_namespace(self):
|
|
||||||
"""Test dropping a table through namespace."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Create tables
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
db.create_table("table1", schema=schema)
|
|
||||||
db.create_table("table2", schema=schema)
|
|
||||||
|
|
||||||
# Verify both tables exist
|
|
||||||
table_names = list(db.table_names())
|
|
||||||
assert "table1" in table_names
|
|
||||||
assert "table2" in table_names
|
|
||||||
assert len(table_names) == 2
|
|
||||||
|
|
||||||
# Drop one table
|
|
||||||
db.drop_table("table1")
|
|
||||||
|
|
||||||
# Verify only table2 remains
|
|
||||||
table_names = list(db.table_names())
|
|
||||||
assert "table1" not in table_names
|
|
||||||
assert "table2" in table_names
|
|
||||||
assert len(table_names) == 1
|
|
||||||
|
|
||||||
# Test that drop_table works without explicit namespace parameter
|
|
||||||
db.drop_table("table2")
|
|
||||||
assert len(list(db.table_names())) == 0
|
|
||||||
|
|
||||||
# Should not be able to open dropped table
|
|
||||||
with pytest.raises(RuntimeError):
|
|
||||||
db.open_table("table1")
|
|
||||||
|
|
||||||
def test_create_table_with_schema(self):
|
|
||||||
"""Test creating a table with explicit schema through namespace."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Define schema
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 3)),
|
|
||||||
pa.field("text", pa.string()),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create table with schema
|
|
||||||
table = db.create_table("test_table", schema=schema)
|
|
||||||
assert table is not None
|
|
||||||
|
|
||||||
# Verify schema
|
|
||||||
table_schema = table.schema
|
|
||||||
assert len(table_schema) == 3
|
|
||||||
assert table_schema.field("id").type == pa.int64()
|
|
||||||
assert table_schema.field("text").type == pa.string()
|
|
||||||
|
|
||||||
def test_rename_table_not_supported(self):
|
|
||||||
"""Test that rename_table raises NotImplementedError."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Create a table
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
db.create_table("old_name", schema=schema)
|
|
||||||
|
|
||||||
# Rename should raise NotImplementedError
|
|
||||||
with pytest.raises(NotImplementedError, match="rename_table is not supported"):
|
|
||||||
db.rename_table("old_name", "new_name")
|
|
||||||
|
|
||||||
def test_drop_all_tables(self):
|
|
||||||
"""Test dropping all tables through namespace."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Create multiple tables
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
for i in range(3):
|
|
||||||
db.create_table(f"table{i}", schema=schema)
|
|
||||||
|
|
||||||
# Verify tables exist
|
|
||||||
assert len(list(db.table_names())) == 3
|
|
||||||
|
|
||||||
# Drop all tables
|
|
||||||
db.drop_all_tables()
|
|
||||||
|
|
||||||
# Verify all tables are gone
|
|
||||||
assert len(list(db.table_names())) == 0
|
|
||||||
|
|
||||||
# Test that table_names works with keyword-only namespace parameter
|
|
||||||
db.create_table("test_table", schema=schema)
|
|
||||||
result = list(db.table_names(namespace=[]))
|
|
||||||
assert "test_table" in result
|
|
||||||
|
|
||||||
def test_table_operations(self):
|
|
||||||
"""Test various table operations through namespace."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Create a table with schema
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
pa.field("text", pa.string()),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
table = db.create_table("test_table", schema=schema)
|
|
||||||
|
|
||||||
# Verify empty table was created
|
|
||||||
result = table.to_pandas()
|
|
||||||
assert len(result) == 0
|
|
||||||
assert list(result.columns) == ["id", "vector", "text"]
|
|
||||||
|
|
||||||
# Test add data to the table
|
|
||||||
new_data = [
|
|
||||||
{"id": 1, "vector": [1.0, 2.0], "text": "item_1"},
|
|
||||||
{"id": 2, "vector": [2.0, 3.0], "text": "item_2"},
|
|
||||||
]
|
|
||||||
table.add(new_data)
|
|
||||||
result = table.to_pandas()
|
|
||||||
assert len(result) == 2
|
|
||||||
|
|
||||||
# Test delete
|
|
||||||
table.delete("id = 1")
|
|
||||||
result = table.to_pandas()
|
|
||||||
assert len(result) == 1
|
|
||||||
assert result["id"].values[0] == 2
|
|
||||||
|
|
||||||
# Test update
|
|
||||||
table.update(where="id = 2", values={"text": "updated"})
|
|
||||||
result = table.to_pandas()
|
|
||||||
assert result["text"].values[0] == "updated"
|
|
||||||
|
|
||||||
def test_storage_options(self):
|
|
||||||
"""Test passing storage options through namespace connection."""
|
|
||||||
# Connect with storage options
|
|
||||||
storage_opts = {"test_option": "test_value"}
|
|
||||||
db = lancedb.connect_namespace(
|
|
||||||
"temp", {"root": self.temp_dir}, storage_options=storage_opts
|
|
||||||
)
|
|
||||||
|
|
||||||
# Storage options should be preserved
|
|
||||||
assert db.storage_options == storage_opts
|
|
||||||
|
|
||||||
# Create table with additional storage options
|
|
||||||
table_opts = {"table_option": "table_value"}
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
db.create_table("test_table", schema=schema, storage_options=table_opts)
|
|
||||||
|
|
||||||
def test_namespace_operations(self):
|
|
||||||
"""Test namespace management operations."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Initially no namespaces
|
|
||||||
assert len(list(db.list_namespaces())) == 0
|
|
||||||
|
|
||||||
# Create a namespace
|
|
||||||
db.create_namespace(["test_namespace"])
|
|
||||||
|
|
||||||
# Verify namespace exists
|
|
||||||
namespaces = list(db.list_namespaces())
|
|
||||||
assert "test_namespace" in namespaces
|
|
||||||
assert len(namespaces) == 1
|
|
||||||
|
|
||||||
# Create table in namespace
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
table = db.create_table(
|
|
||||||
"test_table", schema=schema, namespace=["test_namespace"]
|
|
||||||
)
|
|
||||||
assert table is not None
|
|
||||||
|
|
||||||
# Verify table exists in namespace
|
|
||||||
tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
|
|
||||||
assert "test_table" in tables_in_namespace
|
|
||||||
assert len(tables_in_namespace) == 1
|
|
||||||
|
|
||||||
# Open table from namespace
|
|
||||||
table = db.open_table("test_table", namespace=["test_namespace"])
|
|
||||||
assert table is not None
|
|
||||||
assert table.name == "test_table"
|
|
||||||
|
|
||||||
# Drop table from namespace
|
|
||||||
db.drop_table("test_table", namespace=["test_namespace"])
|
|
||||||
|
|
||||||
# Verify table no longer exists in namespace
|
|
||||||
tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
|
|
||||||
assert len(tables_in_namespace) == 0
|
|
||||||
|
|
||||||
# Drop namespace
|
|
||||||
db.drop_namespace(["test_namespace"])
|
|
||||||
|
|
||||||
# Verify namespace no longer exists
|
|
||||||
namespaces = list(db.list_namespaces())
|
|
||||||
assert len(namespaces) == 0
|
|
||||||
|
|
||||||
def test_namespace_with_tables_cannot_be_dropped(self):
|
|
||||||
"""Test that namespaces containing tables cannot be dropped."""
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Create namespace and table
|
|
||||||
db.create_namespace(["test_namespace"])
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
db.create_table("test_table", schema=schema, namespace=["test_namespace"])
|
|
||||||
|
|
||||||
# Try to drop namespace with tables - should fail
|
|
||||||
with pytest.raises(RuntimeError, match="contains tables"):
|
|
||||||
db.drop_namespace(["test_namespace"])
|
|
||||||
|
|
||||||
# Drop table first
|
|
||||||
db.drop_table("test_table", namespace=["test_namespace"])
|
|
||||||
|
|
||||||
# Now dropping namespace should work
|
|
||||||
db.drop_namespace(["test_namespace"])
|
|
||||||
|
|
||||||
def test_same_table_name_different_namespaces(self):
|
|
||||||
db = lancedb.connect_namespace("temp", {"root": self.temp_dir})
|
|
||||||
|
|
||||||
# Create two namespaces
|
|
||||||
db.create_namespace(["namespace_a"])
|
|
||||||
db.create_namespace(["namespace_b"])
|
|
||||||
|
|
||||||
# Define schema
|
|
||||||
schema = pa.schema(
|
|
||||||
[
|
|
||||||
pa.field("id", pa.int64()),
|
|
||||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
|
||||||
pa.field("text", pa.string()),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create table with same name in both namespaces
|
|
||||||
table_a = db.create_table(
|
|
||||||
"same_name_table", schema=schema, namespace=["namespace_a"]
|
|
||||||
)
|
|
||||||
table_b = db.create_table(
|
|
||||||
"same_name_table", schema=schema, namespace=["namespace_b"]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add different data to each table
|
|
||||||
data_a = [
|
|
||||||
{"id": 1, "vector": [1.0, 2.0], "text": "data_from_namespace_a"},
|
|
||||||
{"id": 2, "vector": [3.0, 4.0], "text": "also_from_namespace_a"},
|
|
||||||
]
|
|
||||||
table_a.add(data_a)
|
|
||||||
|
|
||||||
data_b = [
|
|
||||||
{"id": 10, "vector": [10.0, 20.0], "text": "data_from_namespace_b"},
|
|
||||||
{"id": 20, "vector": [30.0, 40.0], "text": "also_from_namespace_b"},
|
|
||||||
{"id": 30, "vector": [50.0, 60.0], "text": "more_from_namespace_b"},
|
|
||||||
]
|
|
||||||
table_b.add(data_b)
|
|
||||||
|
|
||||||
# Verify data in namespace_a table
|
|
||||||
opened_table_a = db.open_table("same_name_table", namespace=["namespace_a"])
|
|
||||||
result_a = opened_table_a.to_pandas().sort_values("id").reset_index(drop=True)
|
|
||||||
assert len(result_a) == 2
|
|
||||||
assert result_a["id"].tolist() == [1, 2]
|
|
||||||
assert result_a["text"].tolist() == [
|
|
||||||
"data_from_namespace_a",
|
|
||||||
"also_from_namespace_a",
|
|
||||||
]
|
|
||||||
assert [v.tolist() for v in result_a["vector"]] == [[1.0, 2.0], [3.0, 4.0]]
|
|
||||||
|
|
||||||
# Verify data in namespace_b table
|
|
||||||
opened_table_b = db.open_table("same_name_table", namespace=["namespace_b"])
|
|
||||||
result_b = opened_table_b.to_pandas().sort_values("id").reset_index(drop=True)
|
|
||||||
assert len(result_b) == 3
|
|
||||||
assert result_b["id"].tolist() == [10, 20, 30]
|
|
||||||
assert result_b["text"].tolist() == [
|
|
||||||
"data_from_namespace_b",
|
|
||||||
"also_from_namespace_b",
|
|
||||||
"more_from_namespace_b",
|
|
||||||
]
|
|
||||||
assert [v.tolist() for v in result_b["vector"]] == [
|
|
||||||
[10.0, 20.0],
|
|
||||||
[30.0, 40.0],
|
|
||||||
[50.0, 60.0],
|
|
||||||
]
|
|
||||||
|
|
||||||
# Verify root namespace doesn't have this table
|
|
||||||
root_tables = list(db.table_names())
|
|
||||||
assert "same_name_table" not in root_tables
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
db.drop_table("same_name_table", namespace=["namespace_a"])
|
|
||||||
db.drop_table("same_name_table", namespace=["namespace_b"])
|
|
||||||
db.drop_namespace(["namespace_a"])
|
|
||||||
db.drop_namespace(["namespace_b"])
|
|
||||||
@@ -5,7 +5,6 @@ from typing import List, Union
|
|||||||
import unittest.mock as mock
|
import unittest.mock as mock
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import random
|
|
||||||
|
|
||||||
import lancedb
|
import lancedb
|
||||||
from lancedb.db import AsyncConnection
|
from lancedb.db import AsyncConnection
|
||||||
@@ -1356,27 +1355,6 @@ def test_take_queries(tmp_path):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_getitems(tmp_path):
|
|
||||||
db = lancedb.connect(tmp_path)
|
|
||||||
data = pa.table(
|
|
||||||
{
|
|
||||||
"idx": range(100),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
# Make two fragments
|
|
||||||
table = db.create_table("test", data)
|
|
||||||
table.add(pa.table({"idx": range(100, 200)}))
|
|
||||||
|
|
||||||
assert table.__getitems__([5, 2, 117]) == pa.table(
|
|
||||||
{
|
|
||||||
"idx": [5, 2, 117],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
offsets = random.sample(range(200), 10)
|
|
||||||
assert table.__getitems__(offsets) == pa.table({"idx": offsets})
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_query_timeout_async(tmp_path):
|
async def test_query_timeout_async(tmp_path):
|
||||||
db = await lancedb.connect_async(tmp_path)
|
db = await lancedb.connect_async(tmp_path)
|
||||||
|
|||||||
@@ -271,21 +271,12 @@ def test_table_add_in_threadpool():
|
|||||||
|
|
||||||
|
|
||||||
def test_table_create_indices():
|
def test_table_create_indices():
|
||||||
# Track received index creation requests to validate name parameter
|
|
||||||
received_requests = []
|
|
||||||
|
|
||||||
def handler(request):
|
def handler(request):
|
||||||
index_stats = dict(
|
index_stats = dict(
|
||||||
index_type="IVF_PQ", num_indexed_rows=1000, num_unindexed_rows=0
|
index_type="IVF_PQ", num_indexed_rows=1000, num_unindexed_rows=0
|
||||||
)
|
)
|
||||||
|
|
||||||
if request.path == "/v1/table/test/create_index/":
|
if request.path == "/v1/table/test/create_index/":
|
||||||
# Capture the request body to validate name parameter
|
|
||||||
content_len = int(request.headers.get("Content-Length", 0))
|
|
||||||
if content_len > 0:
|
|
||||||
body = request.rfile.read(content_len)
|
|
||||||
body_data = json.loads(body)
|
|
||||||
received_requests.append(body_data)
|
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
elif request.path == "/v1/table/test/create/?mode=create":
|
elif request.path == "/v1/table/test/create/?mode=create":
|
||||||
@@ -316,34 +307,34 @@ def test_table_create_indices():
|
|||||||
dict(
|
dict(
|
||||||
indexes=[
|
indexes=[
|
||||||
{
|
{
|
||||||
"index_name": "custom_scalar_idx",
|
"index_name": "id_idx",
|
||||||
"columns": ["id"],
|
"columns": ["id"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"index_name": "custom_fts_idx",
|
"index_name": "text_idx",
|
||||||
"columns": ["text"],
|
"columns": ["text"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"index_name": "custom_vector_idx",
|
"index_name": "vector_idx",
|
||||||
"columns": ["vector"],
|
"columns": ["vector"],
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
request.wfile.write(payload.encode())
|
request.wfile.write(payload.encode())
|
||||||
elif request.path == "/v1/table/test/index/custom_scalar_idx/stats/":
|
elif request.path == "/v1/table/test/index/id_idx/stats/":
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
request.send_header("Content-Type", "application/json")
|
request.send_header("Content-Type", "application/json")
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
payload = json.dumps(index_stats)
|
payload = json.dumps(index_stats)
|
||||||
request.wfile.write(payload.encode())
|
request.wfile.write(payload.encode())
|
||||||
elif request.path == "/v1/table/test/index/custom_fts_idx/stats/":
|
elif request.path == "/v1/table/test/index/text_idx/stats/":
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
request.send_header("Content-Type", "application/json")
|
request.send_header("Content-Type", "application/json")
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
payload = json.dumps(index_stats)
|
payload = json.dumps(index_stats)
|
||||||
request.wfile.write(payload.encode())
|
request.wfile.write(payload.encode())
|
||||||
elif request.path == "/v1/table/test/index/custom_vector_idx/stats/":
|
elif request.path == "/v1/table/test/index/vector_idx/stats/":
|
||||||
request.send_response(200)
|
request.send_response(200)
|
||||||
request.send_header("Content-Type", "application/json")
|
request.send_header("Content-Type", "application/json")
|
||||||
request.end_headers()
|
request.end_headers()
|
||||||
@@ -360,49 +351,16 @@ def test_table_create_indices():
|
|||||||
# Parameters are well-tested through local and async tests.
|
# Parameters are well-tested through local and async tests.
|
||||||
# This is a smoke-test.
|
# This is a smoke-test.
|
||||||
table = db.create_table("test", [{"id": 1}])
|
table = db.create_table("test", [{"id": 1}])
|
||||||
|
table.create_scalar_index("id", wait_timeout=timedelta(seconds=2))
|
||||||
# Test create_scalar_index with custom name
|
table.create_fts_index("text", wait_timeout=timedelta(seconds=2))
|
||||||
table.create_scalar_index(
|
|
||||||
"id", wait_timeout=timedelta(seconds=2), name="custom_scalar_idx"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test create_fts_index with custom name
|
|
||||||
table.create_fts_index(
|
|
||||||
"text", wait_timeout=timedelta(seconds=2), name="custom_fts_idx"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test create_index with custom name
|
|
||||||
table.create_index(
|
table.create_index(
|
||||||
vector_column_name="vector",
|
vector_column_name="vector", wait_timeout=timedelta(seconds=10)
|
||||||
wait_timeout=timedelta(seconds=10),
|
|
||||||
name="custom_vector_idx",
|
|
||||||
)
|
)
|
||||||
|
table.wait_for_index(["id_idx"], timedelta(seconds=2))
|
||||||
# Validate that the name parameter was passed correctly in requests
|
table.wait_for_index(["text_idx", "vector_idx"], timedelta(seconds=2))
|
||||||
assert len(received_requests) == 3
|
table.drop_index("vector_idx")
|
||||||
|
table.drop_index("id_idx")
|
||||||
# Check scalar index request has custom name
|
table.drop_index("text_idx")
|
||||||
scalar_req = received_requests[0]
|
|
||||||
assert "name" in scalar_req
|
|
||||||
assert scalar_req["name"] == "custom_scalar_idx"
|
|
||||||
|
|
||||||
# Check FTS index request has custom name
|
|
||||||
fts_req = received_requests[1]
|
|
||||||
assert "name" in fts_req
|
|
||||||
assert fts_req["name"] == "custom_fts_idx"
|
|
||||||
|
|
||||||
# Check vector index request has custom name
|
|
||||||
vector_req = received_requests[2]
|
|
||||||
assert "name" in vector_req
|
|
||||||
assert vector_req["name"] == "custom_vector_idx"
|
|
||||||
|
|
||||||
table.wait_for_index(["custom_scalar_idx"], timedelta(seconds=2))
|
|
||||||
table.wait_for_index(
|
|
||||||
["custom_fts_idx", "custom_vector_idx"], timedelta(seconds=2)
|
|
||||||
)
|
|
||||||
table.drop_index("custom_vector_idx")
|
|
||||||
table.drop_index("custom_scalar_idx")
|
|
||||||
table.drop_index("custom_fts_idx")
|
|
||||||
|
|
||||||
|
|
||||||
def test_table_wait_for_index_timeout():
|
def test_table_wait_for_index_timeout():
|
||||||
|
|||||||
@@ -670,9 +670,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
|||||||
num_sub_vectors=96,
|
num_sub_vectors=96,
|
||||||
num_bits=4,
|
num_bits=4,
|
||||||
)
|
)
|
||||||
mock_create_index.assert_called_with(
|
mock_create_index.assert_called_with("vector", replace=True, config=expected_config)
|
||||||
"vector", replace=True, config=expected_config, name=None, train=True
|
|
||||||
)
|
|
||||||
|
|
||||||
table.create_index(
|
table.create_index(
|
||||||
vector_column_name="my_vector",
|
vector_column_name="my_vector",
|
||||||
@@ -682,7 +680,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
|||||||
)
|
)
|
||||||
expected_config = HnswPq(distance_type="dot")
|
expected_config = HnswPq(distance_type="dot")
|
||||||
mock_create_index.assert_called_with(
|
mock_create_index.assert_called_with(
|
||||||
"my_vector", replace=False, config=expected_config, name=None, train=True
|
"my_vector", replace=False, config=expected_config
|
||||||
)
|
)
|
||||||
|
|
||||||
table.create_index(
|
table.create_index(
|
||||||
@@ -697,44 +695,7 @@ def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
|||||||
distance_type="cosine", sample_rate=0.1, m=29, ef_construction=10
|
distance_type="cosine", sample_rate=0.1, m=29, ef_construction=10
|
||||||
)
|
)
|
||||||
mock_create_index.assert_called_with(
|
mock_create_index.assert_called_with(
|
||||||
"my_vector", replace=True, config=expected_config, name=None, train=True
|
"my_vector", replace=True, config=expected_config
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@patch("lancedb.table.AsyncTable.create_index")
|
|
||||||
def test_create_index_name_and_train_parameters(
|
|
||||||
mock_create_index, mem_db: DBConnection
|
|
||||||
):
|
|
||||||
"""Test that name and train parameters are passed correctly to AsyncTable"""
|
|
||||||
table = mem_db.create_table(
|
|
||||||
"test",
|
|
||||||
data=[
|
|
||||||
{"vector": [3.1, 4.1], "id": 1},
|
|
||||||
{"vector": [5.9, 26.5], "id": 2},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test with custom name
|
|
||||||
table.create_index(vector_column_name="vector", name="my_custom_index")
|
|
||||||
expected_config = IvfPq() # Default config
|
|
||||||
mock_create_index.assert_called_with(
|
|
||||||
"vector",
|
|
||||||
replace=True,
|
|
||||||
config=expected_config,
|
|
||||||
name="my_custom_index",
|
|
||||||
train=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test with train=False
|
|
||||||
table.create_index(vector_column_name="vector", train=False)
|
|
||||||
mock_create_index.assert_called_with(
|
|
||||||
"vector", replace=True, config=expected_config, name=None, train=False
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test with both name and train
|
|
||||||
table.create_index(vector_column_name="vector", name="my_index_name", train=True)
|
|
||||||
mock_create_index.assert_called_with(
|
|
||||||
"vector", replace=True, config=expected_config, name="my_index_name", train=True
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -1274,13 +1235,11 @@ def test_create_scalar_index(mem_db: DBConnection):
|
|||||||
"my_table",
|
"my_table",
|
||||||
data=test_data,
|
data=test_data,
|
||||||
)
|
)
|
||||||
# Test with default name
|
|
||||||
table.create_scalar_index("x")
|
table.create_scalar_index("x")
|
||||||
indices = table.list_indices()
|
indices = table.list_indices()
|
||||||
assert len(indices) == 1
|
assert len(indices) == 1
|
||||||
scalar_index = indices[0]
|
scalar_index = indices[0]
|
||||||
assert scalar_index.index_type == "BTree"
|
assert scalar_index.index_type == "BTree"
|
||||||
assert scalar_index.name == "x_idx" # Default name
|
|
||||||
|
|
||||||
# Confirm that prefiltering still works with the scalar index column
|
# Confirm that prefiltering still works with the scalar index column
|
||||||
results = table.search().where("x = 'c'").to_arrow()
|
results = table.search().where("x = 'c'").to_arrow()
|
||||||
@@ -1294,14 +1253,6 @@ def test_create_scalar_index(mem_db: DBConnection):
|
|||||||
indices = table.list_indices()
|
indices = table.list_indices()
|
||||||
assert len(indices) == 0
|
assert len(indices) == 0
|
||||||
|
|
||||||
# Test with custom name
|
|
||||||
table.create_scalar_index("y", name="custom_y_index")
|
|
||||||
indices = table.list_indices()
|
|
||||||
assert len(indices) == 1
|
|
||||||
scalar_index = indices[0]
|
|
||||||
assert scalar_index.index_type == "BTree"
|
|
||||||
assert scalar_index.name == "custom_y_index"
|
|
||||||
|
|
||||||
|
|
||||||
def test_empty_query(mem_db: DBConnection):
|
def test_empty_query(mem_db: DBConnection):
|
||||||
table = mem_db.create_table(
|
table = mem_db.create_table(
|
||||||
|
|||||||
@@ -1,26 +0,0 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
|
||||||
|
|
||||||
import pyarrow as pa
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
torch = pytest.importorskip("torch")
|
|
||||||
|
|
||||||
|
|
||||||
def tbl_to_tensor(tbl):
|
|
||||||
def to_tensor(col: pa.ChunkedArray):
|
|
||||||
if col.num_chunks > 1:
|
|
||||||
raise Exception("Single batch was too large to fit into a one-chunk table")
|
|
||||||
return torch.from_dlpack(col.chunk(0))
|
|
||||||
|
|
||||||
return torch.stack([to_tensor(tbl.column(i)) for i in range(tbl.num_columns)])
|
|
||||||
|
|
||||||
|
|
||||||
def test_table_dataloader(mem_db):
|
|
||||||
table = mem_db.create_table("test_table", pa.table({"a": range(1000)}))
|
|
||||||
dataloader = torch.utils.data.DataLoader(
|
|
||||||
table, collate_fn=tbl_to_tensor, batch_size=10, shuffle=True
|
|
||||||
)
|
|
||||||
for batch in dataloader:
|
|
||||||
assert batch.size(0) == 1
|
|
||||||
assert batch.size(1) == 10
|
|
||||||
@@ -63,16 +63,14 @@ impl Connection {
|
|||||||
self.get_inner().map(|inner| inner.uri().to_string())
|
self.get_inner().map(|inner| inner.uri().to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (namespace=vec![], start_after=None, limit=None))]
|
#[pyo3(signature = (start_after=None, limit=None))]
|
||||||
pub fn table_names(
|
pub fn table_names(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
namespace: Vec<String>,
|
|
||||||
start_after: Option<String>,
|
start_after: Option<String>,
|
||||||
limit: Option<u32>,
|
limit: Option<u32>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
let mut op = inner.table_names();
|
let mut op = inner.table_names();
|
||||||
op = op.namespace(namespace);
|
|
||||||
if let Some(start_after) = start_after {
|
if let Some(start_after) = start_after {
|
||||||
op = op.start_after(start_after);
|
op = op.start_after(start_after);
|
||||||
}
|
}
|
||||||
@@ -82,13 +80,12 @@ impl Connection {
|
|||||||
future_into_py(self_.py(), async move { op.execute().await.infer_error() })
|
future_into_py(self_.py(), async move { op.execute().await.infer_error() })
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (name, mode, data, namespace=vec![], storage_options=None))]
|
#[pyo3(signature = (name, mode, data, storage_options=None))]
|
||||||
pub fn create_table<'a>(
|
pub fn create_table<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
mode: &str,
|
mode: &str,
|
||||||
data: Bound<'_, PyAny>,
|
data: Bound<'_, PyAny>,
|
||||||
namespace: Vec<String>,
|
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
@@ -96,10 +93,8 @@ impl Connection {
|
|||||||
let mode = Self::parse_create_mode_str(mode)?;
|
let mode = Self::parse_create_mode_str(mode)?;
|
||||||
|
|
||||||
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
|
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
|
||||||
|
|
||||||
let mut builder = inner.create_table(name, batches).mode(mode);
|
let mut builder = inner.create_table(name, batches).mode(mode);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
builder = builder.storage_options(storage_options);
|
builder = builder.storage_options(storage_options);
|
||||||
}
|
}
|
||||||
@@ -110,13 +105,12 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (name, mode, schema, namespace=vec![], storage_options=None))]
|
#[pyo3(signature = (name, mode, schema, storage_options=None))]
|
||||||
pub fn create_empty_table<'a>(
|
pub fn create_empty_table<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
mode: &str,
|
mode: &str,
|
||||||
schema: Bound<'_, PyAny>,
|
schema: Bound<'_, PyAny>,
|
||||||
namespace: Vec<String>,
|
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
@@ -127,7 +121,6 @@ impl Connection {
|
|||||||
|
|
||||||
let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
|
let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
|
||||||
|
|
||||||
builder = builder.namespace(namespace);
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
builder = builder.storage_options(storage_options);
|
builder = builder.storage_options(storage_options);
|
||||||
}
|
}
|
||||||
@@ -138,115 +131,49 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (name, namespace=vec![], storage_options = None, index_cache_size = None))]
|
#[pyo3(signature = (name, storage_options = None, index_cache_size = None))]
|
||||||
pub fn open_table(
|
pub fn open_table(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
namespace: Vec<String>,
|
|
||||||
storage_options: Option<HashMap<String, String>>,
|
storage_options: Option<HashMap<String, String>>,
|
||||||
index_cache_size: Option<u32>,
|
index_cache_size: Option<u32>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
|
||||||
let mut builder = inner.open_table(name);
|
let mut builder = inner.open_table(name);
|
||||||
builder = builder.namespace(namespace);
|
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
builder = builder.storage_options(storage_options);
|
builder = builder.storage_options(storage_options);
|
||||||
}
|
}
|
||||||
if let Some(index_cache_size) = index_cache_size {
|
if let Some(index_cache_size) = index_cache_size {
|
||||||
builder = builder.index_cache_size(index_cache_size);
|
builder = builder.index_cache_size(index_cache_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let table = builder.execute().await.infer_error()?;
|
let table = builder.execute().await.infer_error()?;
|
||||||
Ok(Table::new(table))
|
Ok(Table::new(table))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (cur_name, new_name, cur_namespace=vec![], new_namespace=vec![]))]
|
|
||||||
pub fn rename_table(
|
pub fn rename_table(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
cur_name: String,
|
old_name: String,
|
||||||
new_name: String,
|
new_name: String,
|
||||||
cur_namespace: Vec<String>,
|
|
||||||
new_namespace: Vec<String>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner
|
inner.rename_table(old_name, new_name).await.infer_error()
|
||||||
.rename_table(cur_name, new_name, &cur_namespace, &new_namespace)
|
|
||||||
.await
|
|
||||||
.infer_error()
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (name, namespace=vec![]))]
|
pub fn drop_table(self_: PyRef<'_, Self>, name: String) -> PyResult<Bound<'_, PyAny>> {
|
||||||
pub fn drop_table(
|
|
||||||
self_: PyRef<'_, Self>,
|
|
||||||
name: String,
|
|
||||||
namespace: Vec<String>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.drop_table(name, &namespace).await.infer_error()
|
inner.drop_table(name).await.infer_error()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (namespace=vec![],))]
|
pub fn drop_all_tables(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
pub fn drop_all_tables(
|
|
||||||
self_: PyRef<'_, Self>,
|
|
||||||
namespace: Vec<String>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
inner.drop_all_tables(&namespace).await.infer_error()
|
inner.drop_all_tables().await.infer_error()
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Namespace management methods
|
|
||||||
|
|
||||||
#[pyo3(signature = (namespace=vec![], page_token=None, limit=None))]
|
|
||||||
pub fn list_namespaces(
|
|
||||||
self_: PyRef<'_, Self>,
|
|
||||||
namespace: Vec<String>,
|
|
||||||
page_token: Option<String>,
|
|
||||||
limit: Option<u32>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.get_inner()?.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
use lancedb::database::ListNamespacesRequest;
|
|
||||||
let request = ListNamespacesRequest {
|
|
||||||
namespace,
|
|
||||||
page_token,
|
|
||||||
limit,
|
|
||||||
};
|
|
||||||
inner.list_namespaces(request).await.infer_error()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyo3(signature = (namespace,))]
|
|
||||||
pub fn create_namespace(
|
|
||||||
self_: PyRef<'_, Self>,
|
|
||||||
namespace: Vec<String>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.get_inner()?.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
use lancedb::database::CreateNamespaceRequest;
|
|
||||||
let request = CreateNamespaceRequest { namespace };
|
|
||||||
inner.create_namespace(request).await.infer_error()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyo3(signature = (namespace,))]
|
|
||||||
pub fn drop_namespace(
|
|
||||||
self_: PyRef<'_, Self>,
|
|
||||||
namespace: Vec<String>,
|
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
|
||||||
let inner = self_.get_inner()?.clone();
|
|
||||||
future_into_py(self_.py(), async move {
|
|
||||||
use lancedb::database::DropNamespaceRequest;
|
|
||||||
let request = DropNamespaceRequest { namespace };
|
|
||||||
inner.drop_namespace(request).await.infer_error()
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -300,8 +227,6 @@ pub struct PyClientConfig {
|
|||||||
retry_config: Option<PyClientRetryConfig>,
|
retry_config: Option<PyClientRetryConfig>,
|
||||||
timeout_config: Option<PyClientTimeoutConfig>,
|
timeout_config: Option<PyClientTimeoutConfig>,
|
||||||
extra_headers: Option<HashMap<String, String>>,
|
extra_headers: Option<HashMap<String, String>>,
|
||||||
id_delimiter: Option<String>,
|
|
||||||
tls_config: Option<PyClientTlsConfig>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
#[derive(FromPyObject)]
|
||||||
@@ -322,14 +247,6 @@ pub struct PyClientTimeoutConfig {
|
|||||||
pool_idle_timeout: Option<Duration>,
|
pool_idle_timeout: Option<Duration>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(FromPyObject)]
|
|
||||||
pub struct PyClientTlsConfig {
|
|
||||||
cert_file: Option<String>,
|
|
||||||
key_file: Option<String>,
|
|
||||||
ssl_ca_cert: Option<String>,
|
|
||||||
assert_hostname: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "remote")]
|
#[cfg(feature = "remote")]
|
||||||
impl From<PyClientRetryConfig> for lancedb::remote::RetryConfig {
|
impl From<PyClientRetryConfig> for lancedb::remote::RetryConfig {
|
||||||
fn from(value: PyClientRetryConfig) -> Self {
|
fn from(value: PyClientRetryConfig) -> Self {
|
||||||
@@ -356,18 +273,6 @@ impl From<PyClientTimeoutConfig> for lancedb::remote::TimeoutConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "remote")]
|
|
||||||
impl From<PyClientTlsConfig> for lancedb::remote::TlsConfig {
|
|
||||||
fn from(value: PyClientTlsConfig) -> Self {
|
|
||||||
Self {
|
|
||||||
cert_file: value.cert_file,
|
|
||||||
key_file: value.key_file,
|
|
||||||
ssl_ca_cert: value.ssl_ca_cert,
|
|
||||||
assert_hostname: value.assert_hostname,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(feature = "remote")]
|
#[cfg(feature = "remote")]
|
||||||
impl From<PyClientConfig> for lancedb::remote::ClientConfig {
|
impl From<PyClientConfig> for lancedb::remote::ClientConfig {
|
||||||
fn from(value: PyClientConfig) -> Self {
|
fn from(value: PyClientConfig) -> Self {
|
||||||
@@ -376,8 +281,6 @@ impl From<PyClientConfig> for lancedb::remote::ClientConfig {
|
|||||||
retry_config: value.retry_config.map(Into::into).unwrap_or_default(),
|
retry_config: value.retry_config.map(Into::into).unwrap_or_default(),
|
||||||
timeout_config: value.timeout_config.map(Into::into).unwrap_or_default(),
|
timeout_config: value.timeout_config.map(Into::into).unwrap_or_default(),
|
||||||
extra_headers: value.extra_headers.unwrap_or_default(),
|
extra_headers: value.extra_headers.unwrap_or_default(),
|
||||||
id_delimiter: value.id_delimiter,
|
|
||||||
tls_config: value.tls_config.map(Into::into),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -341,15 +341,13 @@ impl Table {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (column, index=None, replace=None, wait_timeout=None, *, name=None, train=None))]
|
#[pyo3(signature = (column, index=None, replace=None, wait_timeout=None))]
|
||||||
pub fn create_index<'a>(
|
pub fn create_index<'a>(
|
||||||
self_: PyRef<'a, Self>,
|
self_: PyRef<'a, Self>,
|
||||||
column: String,
|
column: String,
|
||||||
index: Option<Bound<'_, PyAny>>,
|
index: Option<Bound<'_, PyAny>>,
|
||||||
replace: Option<bool>,
|
replace: Option<bool>,
|
||||||
wait_timeout: Option<Bound<'_, PyAny>>,
|
wait_timeout: Option<Bound<'_, PyAny>>,
|
||||||
name: Option<String>,
|
|
||||||
train: Option<bool>,
|
|
||||||
) -> PyResult<Bound<'a, PyAny>> {
|
) -> PyResult<Bound<'a, PyAny>> {
|
||||||
let index = extract_index_params(&index)?;
|
let index = extract_index_params(&index)?;
|
||||||
let timeout = wait_timeout.map(|t| t.extract::<std::time::Duration>().unwrap());
|
let timeout = wait_timeout.map(|t| t.extract::<std::time::Duration>().unwrap());
|
||||||
@@ -359,12 +357,6 @@ impl Table {
|
|||||||
if let Some(replace) = replace {
|
if let Some(replace) = replace {
|
||||||
op = op.replace(replace);
|
op = op.replace(replace);
|
||||||
}
|
}
|
||||||
if let Some(name) = name {
|
|
||||||
op = op.name(name);
|
|
||||||
}
|
|
||||||
if let Some(train) = train {
|
|
||||||
op = op.train(train);
|
|
||||||
}
|
|
||||||
|
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
op.execute().await.infer_error()?;
|
op.execute().await.infer_error()?;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.22.1-beta.0"
|
version = "0.21.3"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -97,12 +97,7 @@ rstest = "0.23.0"
|
|||||||
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["aws", "gcs", "azure", "dynamodb", "oss"]
|
default = []
|
||||||
aws = ["lance/aws", "lance-io/aws"]
|
|
||||||
oss = ["lance/oss", "lance-io/oss"]
|
|
||||||
gcs = ["lance/gcp", "lance-io/gcp"]
|
|
||||||
azure = ["lance/azure", "lance-io/azure"]
|
|
||||||
dynamodb = ["lance/dynamodb", "aws"]
|
|
||||||
remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
|
remote = ["dep:reqwest", "dep:http", "dep:rand", "dep:uuid"]
|
||||||
fp16kernels = ["lance-linalg/fp16kernels"]
|
fp16kernels = ["lance-linalg/fp16kernels"]
|
||||||
s3-test = []
|
s3-test = []
|
||||||
|
|||||||
@@ -62,8 +62,10 @@ async fn main() -> Result<()> {
|
|||||||
.as_any()
|
.as_any()
|
||||||
.downcast_ref::<StringArray>()
|
.downcast_ref::<StringArray>()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
for text in out.iter().flatten() {
|
for text in out.iter() {
|
||||||
println!("Result: {}", text);
|
if let Some(text) = text {
|
||||||
|
println!("Result: {}", text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ async fn main() -> Result<()> {
|
|||||||
// --8<-- [end:delete]
|
// --8<-- [end:delete]
|
||||||
|
|
||||||
// --8<-- [start:drop_table]
|
// --8<-- [start:drop_table]
|
||||||
db.drop_table("my_table", &[]).await.unwrap();
|
db.drop_table("my_table").await.unwrap();
|
||||||
// --8<-- [end:drop_table]
|
// --8<-- [end:drop_table]
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -379,7 +379,6 @@ mod tests {
|
|||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
||||||
mode: Default::default(),
|
mode: Default::default(),
|
||||||
write_options: Default::default(),
|
write_options: Default::default(),
|
||||||
namespace: vec![],
|
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -415,7 +414,6 @@ mod tests {
|
|||||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
data: CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
||||||
mode: Default::default(),
|
mode: Default::default(),
|
||||||
write_options: Default::default(),
|
write_options: Default::default(),
|
||||||
namespace: vec![],
|
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ use std::sync::Arc;
|
|||||||
use arrow_array::RecordBatchReader;
|
use arrow_array::RecordBatchReader;
|
||||||
use arrow_schema::{Field, SchemaRef};
|
use arrow_schema::{Field, SchemaRef};
|
||||||
use lance::dataset::ReadParams;
|
use lance::dataset::ReadParams;
|
||||||
#[cfg(feature = "aws")]
|
|
||||||
use object_store::aws::AwsCredential;
|
use object_store::aws::AwsCredential;
|
||||||
|
|
||||||
use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
|
use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
|
||||||
@@ -19,9 +18,8 @@ use crate::database::listing::{
|
|||||||
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
||||||
};
|
};
|
||||||
use crate::database::{
|
use crate::database::{
|
||||||
CreateNamespaceRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database,
|
CreateTableData, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
|
||||||
DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
|
OpenTableRequest, TableNamesRequest,
|
||||||
TableNamesRequest,
|
|
||||||
};
|
};
|
||||||
use crate::embeddings::{
|
use crate::embeddings::{
|
||||||
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings,
|
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings,
|
||||||
@@ -68,12 +66,6 @@ impl TableNamesBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the namespace to list tables from
|
|
||||||
pub fn namespace(mut self, namespace: Vec<String>) -> Self {
|
|
||||||
self.request.namespace = namespace;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Execute the table names operation
|
/// Execute the table names operation
|
||||||
pub async fn execute(self) -> Result<Vec<String>> {
|
pub async fn execute(self) -> Result<Vec<String>> {
|
||||||
self.parent.clone().table_names(self.request).await
|
self.parent.clone().table_names(self.request).await
|
||||||
@@ -355,12 +347,6 @@ impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
|||||||
);
|
);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the namespace for the table
|
|
||||||
pub fn namespace(mut self, namespace: Vec<String>) -> Self {
|
|
||||||
self.request.namespace = namespace;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
@@ -380,7 +366,6 @@ impl OpenTableBuilder {
|
|||||||
parent,
|
parent,
|
||||||
request: OpenTableRequest {
|
request: OpenTableRequest {
|
||||||
name,
|
name,
|
||||||
namespace: vec![],
|
|
||||||
index_cache_size: None,
|
index_cache_size: None,
|
||||||
lance_read_params: None,
|
lance_read_params: None,
|
||||||
},
|
},
|
||||||
@@ -456,12 +441,6 @@ impl OpenTableBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the namespace for the table
|
|
||||||
pub fn namespace(mut self, namespace: Vec<String>) -> Self {
|
|
||||||
self.request.namespace = namespace;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Open the table
|
/// Open the table
|
||||||
pub async fn execute(self) -> Result<Table> {
|
pub async fn execute(self) -> Result<Table> {
|
||||||
Ok(Table::new_with_embedding_registry(
|
Ok(Table::new_with_embedding_registry(
|
||||||
@@ -584,16 +563,9 @@ impl Connection {
|
|||||||
&self,
|
&self,
|
||||||
old_name: impl AsRef<str>,
|
old_name: impl AsRef<str>,
|
||||||
new_name: impl AsRef<str>,
|
new_name: impl AsRef<str>,
|
||||||
cur_namespace: &[String],
|
|
||||||
new_namespace: &[String],
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
self.internal
|
self.internal
|
||||||
.rename_table(
|
.rename_table(old_name.as_ref(), new_name.as_ref())
|
||||||
old_name.as_ref(),
|
|
||||||
new_name.as_ref(),
|
|
||||||
cur_namespace,
|
|
||||||
new_namespace,
|
|
||||||
)
|
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -601,9 +573,8 @@ impl Connection {
|
|||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
/// * `name` - The name of the table to drop
|
/// * `name` - The name of the table to drop
|
||||||
/// * `namespace` - The namespace to drop the table from
|
pub async fn drop_table(&self, name: impl AsRef<str>) -> Result<()> {
|
||||||
pub async fn drop_table(&self, name: impl AsRef<str>, namespace: &[String]) -> Result<()> {
|
self.internal.drop_table(name.as_ref()).await
|
||||||
self.internal.drop_table(name.as_ref(), namespace).await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Drop the database
|
/// Drop the database
|
||||||
@@ -611,30 +582,12 @@ impl Connection {
|
|||||||
/// This is the same as dropping all of the tables
|
/// This is the same as dropping all of the tables
|
||||||
#[deprecated(since = "0.15.1", note = "Use `drop_all_tables` instead")]
|
#[deprecated(since = "0.15.1", note = "Use `drop_all_tables` instead")]
|
||||||
pub async fn drop_db(&self) -> Result<()> {
|
pub async fn drop_db(&self) -> Result<()> {
|
||||||
self.internal.drop_all_tables(&[]).await
|
self.internal.drop_all_tables().await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Drops all tables in the database
|
/// Drops all tables in the database
|
||||||
///
|
pub async fn drop_all_tables(&self) -> Result<()> {
|
||||||
/// # Arguments
|
self.internal.drop_all_tables().await
|
||||||
/// * `namespace` - The namespace to drop all tables from. Empty slice represents root namespace.
|
|
||||||
pub async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
|
|
||||||
self.internal.drop_all_tables(namespace).await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// List immediate child namespace names in the given namespace
|
|
||||||
pub async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
|
|
||||||
self.internal.list_namespaces(request).await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a new namespace
|
|
||||||
pub async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()> {
|
|
||||||
self.internal.create_namespace(request).await
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Drop a namespace
|
|
||||||
pub async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()> {
|
|
||||||
self.internal.drop_namespace(request).await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the in-memory embedding registry.
|
/// Get the in-memory embedding registry.
|
||||||
@@ -796,7 +749,6 @@ impl ConnectBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// [`AwsCredential`] to use when connecting to S3.
|
/// [`AwsCredential`] to use when connecting to S3.
|
||||||
#[cfg(feature = "aws")]
|
|
||||||
#[deprecated(note = "Pass through storage_options instead")]
|
#[deprecated(note = "Pass through storage_options instead")]
|
||||||
pub fn aws_creds(mut self, aws_creds: AwsCredential) -> Self {
|
pub fn aws_creds(mut self, aws_creds: AwsCredential) -> Self {
|
||||||
self.request
|
self.request
|
||||||
@@ -1266,12 +1218,12 @@ mod tests {
|
|||||||
|
|
||||||
// drop non-exist table
|
// drop non-exist table
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
db.drop_table("invalid_table", &[]).await,
|
db.drop_table("invalid_table").await,
|
||||||
Err(crate::Error::TableNotFound { .. }),
|
Err(crate::Error::TableNotFound { .. }),
|
||||||
));
|
));
|
||||||
|
|
||||||
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
|
create_dir_all(tmp_dir.path().join("table1.lance")).unwrap();
|
||||||
db.drop_table("table1", &[]).await.unwrap();
|
db.drop_table("table1").await.unwrap();
|
||||||
|
|
||||||
let tables = db.table_names().execute().await.unwrap();
|
let tables = db.table_names().execute().await.unwrap();
|
||||||
assert_eq!(tables.len(), 0);
|
assert_eq!(tables.len(), 0);
|
||||||
|
|||||||
@@ -34,36 +34,9 @@ pub trait DatabaseOptions {
|
|||||||
fn serialize_into_map(&self, map: &mut HashMap<String, String>);
|
fn serialize_into_map(&self, map: &mut HashMap<String, String>);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A request to list namespaces in the database
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub struct ListNamespacesRequest {
|
|
||||||
/// The parent namespace to list namespaces in. Empty list represents root namespace.
|
|
||||||
pub namespace: Vec<String>,
|
|
||||||
/// If present, only return names that come lexicographically after the supplied value.
|
|
||||||
pub page_token: Option<String>,
|
|
||||||
/// The maximum number of namespace names to return
|
|
||||||
pub limit: Option<u32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A request to create a namespace
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct CreateNamespaceRequest {
|
|
||||||
/// The namespace identifier to create
|
|
||||||
pub namespace: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A request to drop a namespace
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct DropNamespaceRequest {
|
|
||||||
/// The namespace identifier to drop
|
|
||||||
pub namespace: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A request to list names of tables in the database
|
/// A request to list names of tables in the database
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
pub struct TableNamesRequest {
|
pub struct TableNamesRequest {
|
||||||
/// The namespace to list tables in. Empty list represents root namespace.
|
|
||||||
pub namespace: Vec<String>,
|
|
||||||
/// If present, only return names that come lexicographically after the supplied
|
/// If present, only return names that come lexicographically after the supplied
|
||||||
/// value.
|
/// value.
|
||||||
///
|
///
|
||||||
@@ -78,8 +51,6 @@ pub struct TableNamesRequest {
|
|||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct OpenTableRequest {
|
pub struct OpenTableRequest {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
/// The namespace to open the table from. Empty list represents root namespace.
|
|
||||||
pub namespace: Vec<String>,
|
|
||||||
pub index_cache_size: Option<u32>,
|
pub index_cache_size: Option<u32>,
|
||||||
pub lance_read_params: Option<ReadParams>,
|
pub lance_read_params: Option<ReadParams>,
|
||||||
}
|
}
|
||||||
@@ -154,8 +125,6 @@ impl StreamingWriteSource for CreateTableData {
|
|||||||
pub struct CreateTableRequest {
|
pub struct CreateTableRequest {
|
||||||
/// The name of the new table
|
/// The name of the new table
|
||||||
pub name: String,
|
pub name: String,
|
||||||
/// The namespace to create the table in. Empty list represents root namespace.
|
|
||||||
pub namespace: Vec<String>,
|
|
||||||
/// Initial data to write to the table, can be None to create an empty table
|
/// Initial data to write to the table, can be None to create an empty table
|
||||||
pub data: CreateTableData,
|
pub data: CreateTableData,
|
||||||
/// The mode to use when creating the table
|
/// The mode to use when creating the table
|
||||||
@@ -168,7 +137,6 @@ impl CreateTableRequest {
|
|||||||
pub fn new(name: String, data: CreateTableData) -> Self {
|
pub fn new(name: String, data: CreateTableData) -> Self {
|
||||||
Self {
|
Self {
|
||||||
name,
|
name,
|
||||||
namespace: vec![],
|
|
||||||
data,
|
data,
|
||||||
mode: CreateTableMode::default(),
|
mode: CreateTableMode::default(),
|
||||||
write_options: WriteOptions::default(),
|
write_options: WriteOptions::default(),
|
||||||
@@ -183,12 +151,6 @@ impl CreateTableRequest {
|
|||||||
pub trait Database:
|
pub trait Database:
|
||||||
Send + Sync + std::any::Any + std::fmt::Debug + std::fmt::Display + 'static
|
Send + Sync + std::any::Any + std::fmt::Debug + std::fmt::Display + 'static
|
||||||
{
|
{
|
||||||
/// List immediate child namespace names in the given namespace
|
|
||||||
async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>>;
|
|
||||||
/// Create a new namespace
|
|
||||||
async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()>;
|
|
||||||
/// Drop a namespace
|
|
||||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()>;
|
|
||||||
/// List the names of tables in the database
|
/// List the names of tables in the database
|
||||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>>;
|
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>>;
|
||||||
/// Create a table in the database
|
/// Create a table in the database
|
||||||
@@ -196,16 +158,10 @@ pub trait Database:
|
|||||||
/// Open a table in the database
|
/// Open a table in the database
|
||||||
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>>;
|
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>>;
|
||||||
/// Rename a table in the database
|
/// Rename a table in the database
|
||||||
async fn rename_table(
|
async fn rename_table(&self, old_name: &str, new_name: &str) -> Result<()>;
|
||||||
&self,
|
|
||||||
cur_name: &str,
|
|
||||||
new_name: &str,
|
|
||||||
cur_namespace: &[String],
|
|
||||||
new_namespace: &[String],
|
|
||||||
) -> Result<()>;
|
|
||||||
/// Drop a table in the database
|
/// Drop a table in the database
|
||||||
async fn drop_table(&self, name: &str, namespace: &[String]) -> Result<()>;
|
async fn drop_table(&self, name: &str) -> Result<()>;
|
||||||
/// Drop all tables in the database
|
/// Drop all tables in the database
|
||||||
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()>;
|
async fn drop_all_tables(&self) -> Result<()>;
|
||||||
fn as_any(&self) -> &dyn std::any::Any;
|
fn as_any(&self) -> &dyn std::any::Any;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,8 +22,7 @@ use crate::table::NativeTable;
|
|||||||
use crate::utils::validate_table_name;
|
use crate::utils::validate_table_name;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
BaseTable, CreateNamespaceRequest, CreateTableMode, CreateTableRequest, Database,
|
BaseTable, CreateTableMode, CreateTableRequest, Database, DatabaseOptions, OpenTableRequest,
|
||||||
DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
|
|
||||||
TableNamesRequest,
|
TableNamesRequest,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -552,7 +551,6 @@ impl ListingDatabase {
|
|||||||
async fn handle_table_exists(
|
async fn handle_table_exists(
|
||||||
&self,
|
&self,
|
||||||
table_name: &str,
|
table_name: &str,
|
||||||
namespace: Vec<String>,
|
|
||||||
mode: CreateTableMode,
|
mode: CreateTableMode,
|
||||||
data_schema: &arrow_schema::Schema,
|
data_schema: &arrow_schema::Schema,
|
||||||
) -> Result<Arc<dyn BaseTable>> {
|
) -> Result<Arc<dyn BaseTable>> {
|
||||||
@@ -563,7 +561,6 @@ impl ListingDatabase {
|
|||||||
CreateTableMode::ExistOk(callback) => {
|
CreateTableMode::ExistOk(callback) => {
|
||||||
let req = OpenTableRequest {
|
let req = OpenTableRequest {
|
||||||
name: table_name.to_string(),
|
name: table_name.to_string(),
|
||||||
namespace: namespace.clone(),
|
|
||||||
index_cache_size: None,
|
index_cache_size: None,
|
||||||
lance_read_params: None,
|
lance_read_params: None,
|
||||||
};
|
};
|
||||||
@@ -587,28 +584,7 @@ impl ListingDatabase {
|
|||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl Database for ListingDatabase {
|
impl Database for ListingDatabase {
|
||||||
async fn list_namespaces(&self, _request: ListNamespacesRequest) -> Result<Vec<String>> {
|
|
||||||
Ok(Vec::new())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn create_namespace(&self, _request: CreateNamespaceRequest) -> Result<()> {
|
|
||||||
Err(Error::NotSupported {
|
|
||||||
message: "Namespace operations are not supported for listing database".into(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn drop_namespace(&self, _request: DropNamespaceRequest) -> Result<()> {
|
|
||||||
Err(Error::NotSupported {
|
|
||||||
message: "Namespace operations are not supported for listing database".into(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
|
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
|
||||||
if !request.namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let mut f = self
|
let mut f = self
|
||||||
.object_store
|
.object_store
|
||||||
.read_dir(self.base_path.clone())
|
.read_dir(self.base_path.clone())
|
||||||
@@ -639,11 +615,6 @@ impl Database for ListingDatabase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||||
if !request.namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let table_uri = self.table_uri(&request.name)?;
|
let table_uri = self.table_uri(&request.name)?;
|
||||||
|
|
||||||
let (storage_version_override, v2_manifest_override) =
|
let (storage_version_override, v2_manifest_override) =
|
||||||
@@ -666,24 +637,14 @@ impl Database for ListingDatabase {
|
|||||||
{
|
{
|
||||||
Ok(table) => Ok(Arc::new(table)),
|
Ok(table) => Ok(Arc::new(table)),
|
||||||
Err(Error::TableAlreadyExists { .. }) => {
|
Err(Error::TableAlreadyExists { .. }) => {
|
||||||
self.handle_table_exists(
|
self.handle_table_exists(&request.name, request.mode, &data_schema)
|
||||||
&request.name,
|
.await
|
||||||
request.namespace.clone(),
|
|
||||||
request.mode,
|
|
||||||
&data_schema,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
}
|
}
|
||||||
Err(err) => Err(err),
|
Err(err) => Err(err),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
async fn open_table(&self, mut request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||||
if !request.namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace parameter is not supported for listing database. Only root namespace is supported.".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let table_uri = self.table_uri(&request.name)?;
|
let table_uri = self.table_uri(&request.name)?;
|
||||||
|
|
||||||
// Only modify the storage options if we actually have something to
|
// Only modify the storage options if we actually have something to
|
||||||
@@ -733,44 +694,17 @@ impl Database for ListingDatabase {
|
|||||||
Ok(native_table)
|
Ok(native_table)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn rename_table(
|
async fn rename_table(&self, _old_name: &str, _new_name: &str) -> Result<()> {
|
||||||
&self,
|
|
||||||
_cur_name: &str,
|
|
||||||
_new_name: &str,
|
|
||||||
cur_namespace: &[String],
|
|
||||||
new_namespace: &[String],
|
|
||||||
) -> Result<()> {
|
|
||||||
if !cur_namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace parameter is not supported for listing database.".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if !new_namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace parameter is not supported for listing database.".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Err(Error::NotSupported {
|
Err(Error::NotSupported {
|
||||||
message: "rename_table is not supported in LanceDB OSS".into(),
|
message: "rename_table is not supported in LanceDB OSS".to_string(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drop_table(&self, name: &str, namespace: &[String]) -> Result<()> {
|
async fn drop_table(&self, name: &str) -> Result<()> {
|
||||||
if !namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace parameter is not supported for listing database.".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
self.drop_tables(vec![name.to_string()]).await
|
self.drop_tables(vec![name.to_string()]).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
|
async fn drop_all_tables(&self) -> Result<()> {
|
||||||
// Check if namespace parameter is provided
|
|
||||||
if !namespace.is_empty() {
|
|
||||||
return Err(Error::NotSupported {
|
|
||||||
message: "Namespace parameter is not supported for listing database.".into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let tables = self.table_names(TableNamesRequest::default()).await?;
|
let tables = self.table_names(TableNamesRequest::default()).await?;
|
||||||
self.drop_tables(tables).await
|
self.drop_tables(tables).await
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -65,94 +65,12 @@ pub enum Index {
|
|||||||
/// Builder for the create_index operation
|
/// Builder for the create_index operation
|
||||||
///
|
///
|
||||||
/// The methods on this builder are used to specify options common to all indices.
|
/// The methods on this builder are used to specify options common to all indices.
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// Creating a basic vector index:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use lancedb::{connect, index::{Index, vector::IvfPqIndexBuilder}};
|
|
||||||
///
|
|
||||||
/// # async fn create_basic_vector_index() -> lancedb::Result<()> {
|
|
||||||
/// let db = connect("data/sample-lancedb").execute().await?;
|
|
||||||
/// let table = db.open_table("my_table").execute().await?;
|
|
||||||
///
|
|
||||||
/// // Create a vector index with default settings
|
|
||||||
/// table
|
|
||||||
/// .create_index(&["vector"], Index::IvfPq(IvfPqIndexBuilder::default()))
|
|
||||||
/// .execute()
|
|
||||||
/// .await?;
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Creating an index with a custom name:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use lancedb::{connect, index::{Index, vector::IvfPqIndexBuilder}};
|
|
||||||
///
|
|
||||||
/// # async fn create_named_index() -> lancedb::Result<()> {
|
|
||||||
/// let db = connect("data/sample-lancedb").execute().await?;
|
|
||||||
/// let table = db.open_table("my_table").execute().await?;
|
|
||||||
///
|
|
||||||
/// // Create a vector index with a custom name
|
|
||||||
/// table
|
|
||||||
/// .create_index(&["embeddings"], Index::IvfPq(IvfPqIndexBuilder::default()))
|
|
||||||
/// .name("my_embeddings_index".to_string())
|
|
||||||
/// .execute()
|
|
||||||
/// .await?;
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Creating an untrained index (for scalar indices only):
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use lancedb::{connect, index::{Index, scalar::BTreeIndexBuilder}};
|
|
||||||
///
|
|
||||||
/// # async fn create_untrained_index() -> lancedb::Result<()> {
|
|
||||||
/// let db = connect("data/sample-lancedb").execute().await?;
|
|
||||||
/// let table = db.open_table("my_table").execute().await?;
|
|
||||||
///
|
|
||||||
/// // Create a BTree index without training (creates empty index)
|
|
||||||
/// table
|
|
||||||
/// .create_index(&["category"], Index::BTree(BTreeIndexBuilder::default()))
|
|
||||||
/// .train(false)
|
|
||||||
/// .name("category_index".to_string())
|
|
||||||
/// .execute()
|
|
||||||
/// .await?;
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Creating a scalar index with all options:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use lancedb::{connect, index::{Index, scalar::BitmapIndexBuilder}};
|
|
||||||
///
|
|
||||||
/// # async fn create_full_options_index() -> lancedb::Result<()> {
|
|
||||||
/// let db = connect("data/sample-lancedb").execute().await?;
|
|
||||||
/// let table = db.open_table("my_table").execute().await?;
|
|
||||||
///
|
|
||||||
/// // Create a bitmap index with full configuration
|
|
||||||
/// table
|
|
||||||
/// .create_index(&["status"], Index::Bitmap(BitmapIndexBuilder::default()))
|
|
||||||
/// .name("status_bitmap_index".to_string())
|
|
||||||
/// .train(true) // Train the index with existing data
|
|
||||||
/// .replace(false) // Don't replace if index already exists
|
|
||||||
/// .execute()
|
|
||||||
/// .await?;
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
pub struct IndexBuilder {
|
pub struct IndexBuilder {
|
||||||
parent: Arc<dyn BaseTable>,
|
parent: Arc<dyn BaseTable>,
|
||||||
pub(crate) index: Index,
|
pub(crate) index: Index,
|
||||||
pub(crate) columns: Vec<String>,
|
pub(crate) columns: Vec<String>,
|
||||||
pub(crate) replace: bool,
|
pub(crate) replace: bool,
|
||||||
pub(crate) wait_timeout: Option<Duration>,
|
pub(crate) wait_timeout: Option<Duration>,
|
||||||
pub(crate) train: bool,
|
|
||||||
pub(crate) name: Option<String>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexBuilder {
|
impl IndexBuilder {
|
||||||
@@ -162,9 +80,7 @@ impl IndexBuilder {
|
|||||||
index,
|
index,
|
||||||
columns,
|
columns,
|
||||||
replace: true,
|
replace: true,
|
||||||
train: true,
|
|
||||||
wait_timeout: None,
|
wait_timeout: None,
|
||||||
name: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -178,82 +94,6 @@ impl IndexBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The name of the index. If not set, a default name will be generated.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use lancedb::{connect, index::{Index, scalar::BTreeIndexBuilder}};
|
|
||||||
///
|
|
||||||
/// # async fn name_example() -> lancedb::Result<()> {
|
|
||||||
/// let db = connect("data/sample-lancedb").execute().await?;
|
|
||||||
/// let table = db.open_table("my_table").execute().await?;
|
|
||||||
///
|
|
||||||
/// // Create an index with a custom name
|
|
||||||
/// table
|
|
||||||
/// .create_index(&["user_id"], Index::BTree(BTreeIndexBuilder::default()))
|
|
||||||
/// .name("user_id_btree_index".to_string())
|
|
||||||
/// .execute()
|
|
||||||
/// .await?;
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
pub fn name(mut self, v: String) -> Self {
|
|
||||||
self.name = Some(v);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether to train the index, the default is `true`.
|
|
||||||
///
|
|
||||||
/// If this is false, the index will not be trained and just created empty.
|
|
||||||
///
|
|
||||||
/// This is not supported for vector indices yet.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// Creating an empty index that will be populated later:
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use lancedb::{connect, index::{Index, scalar::BitmapIndexBuilder}};
|
|
||||||
///
|
|
||||||
/// # async fn train_false_example() -> lancedb::Result<()> {
|
|
||||||
/// let db = connect("data/sample-lancedb").execute().await?;
|
|
||||||
/// let table = db.open_table("my_table").execute().await?;
|
|
||||||
///
|
|
||||||
/// // Create an empty bitmap index (not trained with existing data)
|
|
||||||
/// table
|
|
||||||
/// .create_index(&["category"], Index::Bitmap(BitmapIndexBuilder::default()))
|
|
||||||
/// .train(false) // Create empty index
|
|
||||||
/// .name("category_bitmap".to_string())
|
|
||||||
/// .execute()
|
|
||||||
/// .await?;
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
///
|
|
||||||
/// Creating a trained index (default behavior):
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use lancedb::{connect, index::{Index, scalar::BTreeIndexBuilder}};
|
|
||||||
///
|
|
||||||
/// # async fn train_true_example() -> lancedb::Result<()> {
|
|
||||||
/// let db = connect("data/sample-lancedb").execute().await?;
|
|
||||||
/// let table = db.open_table("my_table").execute().await?;
|
|
||||||
///
|
|
||||||
/// // Create a trained BTree index (includes existing data)
|
|
||||||
/// table
|
|
||||||
/// .create_index(&["timestamp"], Index::BTree(BTreeIndexBuilder::default()))
|
|
||||||
/// .train(true) // Train with existing data (this is the default)
|
|
||||||
/// .execute()
|
|
||||||
/// .await?;
|
|
||||||
/// # Ok(())
|
|
||||||
/// # }
|
|
||||||
/// ```
|
|
||||||
pub fn train(mut self, v: bool) -> Self {
|
|
||||||
self.train = v;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Duration of time to wait for asynchronous indexing to complete. If not set,
|
/// Duration of time to wait for asynchronous indexing to complete. If not set,
|
||||||
/// `create_index()` will not wait.
|
/// `create_index()` will not wait.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ use futures::{stream::BoxStream, TryFutureExt};
|
|||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
use object_store::{
|
use object_store::{
|
||||||
path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||||
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart,
|
PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, UploadPart,
|
||||||
};
|
};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
@@ -73,7 +73,7 @@ impl ObjectStore for MirroringObjectStore {
|
|||||||
async fn put_multipart_opts(
|
async fn put_multipart_opts(
|
||||||
&self,
|
&self,
|
||||||
location: &Path,
|
location: &Path,
|
||||||
opts: PutMultipartOptions,
|
opts: PutMultipartOpts,
|
||||||
) -> Result<Box<dyn MultipartUpload>> {
|
) -> Result<Box<dyn MultipartUpload>> {
|
||||||
if location.primary_only() {
|
if location.primary_only() {
|
||||||
return self.primary.put_multipart_opts(location, opts).await;
|
return self.primary.put_multipart_opts(location, opts).await;
|
||||||
@@ -170,11 +170,7 @@ impl MirroringObjectStoreWrapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl WrappingObjectStore for MirroringObjectStoreWrapper {
|
impl WrappingObjectStore for MirroringObjectStoreWrapper {
|
||||||
fn wrap(
|
fn wrap(&self, primary: Arc<dyn ObjectStore>) -> Arc<dyn ObjectStore> {
|
||||||
&self,
|
|
||||||
primary: Arc<dyn ObjectStore>,
|
|
||||||
_storage_options: Option<&std::collections::HashMap<String, String>>,
|
|
||||||
) -> Arc<dyn ObjectStore> {
|
|
||||||
Arc::new(MirroringObjectStore {
|
Arc::new(MirroringObjectStore {
|
||||||
primary,
|
primary,
|
||||||
secondary: self.secondary.clone(),
|
secondary: self.secondary.clone(),
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ use futures::stream::BoxStream;
|
|||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
use object_store::{
|
use object_store::{
|
||||||
path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||||
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
|
PutMultipartOpts, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
@@ -50,11 +50,7 @@ impl IoStatsHolder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl WrappingObjectStore for IoStatsHolder {
|
impl WrappingObjectStore for IoStatsHolder {
|
||||||
fn wrap(
|
fn wrap(&self, target: Arc<dyn ObjectStore>) -> Arc<dyn ObjectStore> {
|
||||||
&self,
|
|
||||||
target: Arc<dyn ObjectStore>,
|
|
||||||
_storage_options: Option<&std::collections::HashMap<String, String>>,
|
|
||||||
) -> Arc<dyn ObjectStore> {
|
|
||||||
Arc::new(IoTrackingStore {
|
Arc::new(IoTrackingStore {
|
||||||
target,
|
target,
|
||||||
stats: self.0.clone(),
|
stats: self.0.clone(),
|
||||||
@@ -110,7 +106,7 @@ impl ObjectStore for IoTrackingStore {
|
|||||||
async fn put_multipart_opts(
|
async fn put_multipart_opts(
|
||||||
&self,
|
&self,
|
||||||
location: &Path,
|
location: &Path,
|
||||||
opts: PutMultipartOptions,
|
opts: PutMultipartOpts,
|
||||||
) -> OSResult<Box<dyn MultipartUpload>> {
|
) -> OSResult<Box<dyn MultipartUpload>> {
|
||||||
let target = self.target.put_multipart_opts(location, opts).await?;
|
let target = self.target.put_multipart_opts(location, opts).await?;
|
||||||
Ok(Box::new(IoTrackingMultipartUpload {
|
Ok(Box::new(IoTrackingMultipartUpload {
|
||||||
|
|||||||
@@ -18,5 +18,5 @@ const ARROW_FILE_CONTENT_TYPE: &str = "application/vnd.apache.arrow.file";
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
const JSON_CONTENT_TYPE: &str = "application/json";
|
const JSON_CONTENT_TYPE: &str = "application/json";
|
||||||
|
|
||||||
pub use client::{ClientConfig, RetryConfig, TimeoutConfig, TlsConfig};
|
pub use client::{ClientConfig, RetryConfig, TimeoutConfig};
|
||||||
pub use db::{RemoteDatabaseOptions, RemoteDatabaseOptionsBuilder};
|
pub use db::{RemoteDatabaseOptions, RemoteDatabaseOptionsBuilder};
|
||||||
|
|||||||
@@ -15,19 +15,6 @@ use crate::remote::retry::{ResolvedRetryConfig, RetryCounter};
|
|||||||
|
|
||||||
const REQUEST_ID_HEADER: HeaderName = HeaderName::from_static("x-request-id");
|
const REQUEST_ID_HEADER: HeaderName = HeaderName::from_static("x-request-id");
|
||||||
|
|
||||||
/// Configuration for TLS/mTLS settings.
|
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
pub struct TlsConfig {
|
|
||||||
/// Path to the client certificate file (PEM format)
|
|
||||||
pub cert_file: Option<String>,
|
|
||||||
/// Path to the client private key file (PEM format)
|
|
||||||
pub key_file: Option<String>,
|
|
||||||
/// Path to the CA certificate file for server verification (PEM format)
|
|
||||||
pub ssl_ca_cert: Option<String>,
|
|
||||||
/// Whether to verify the hostname in the server's certificate
|
|
||||||
pub assert_hostname: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Configuration for the LanceDB Cloud HTTP client.
|
/// Configuration for the LanceDB Cloud HTTP client.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct ClientConfig {
|
pub struct ClientConfig {
|
||||||
@@ -38,11 +25,6 @@ pub struct ClientConfig {
|
|||||||
pub user_agent: String,
|
pub user_agent: String,
|
||||||
// TODO: how to configure request ids?
|
// TODO: how to configure request ids?
|
||||||
pub extra_headers: HashMap<String, String>,
|
pub extra_headers: HashMap<String, String>,
|
||||||
/// The delimiter to use when constructing object identifiers.
|
|
||||||
/// If not default, passes as query parameter.
|
|
||||||
pub id_delimiter: Option<String>,
|
|
||||||
/// TLS configuration for mTLS support
|
|
||||||
pub tls_config: Option<TlsConfig>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for ClientConfig {
|
impl Default for ClientConfig {
|
||||||
@@ -52,8 +34,6 @@ impl Default for ClientConfig {
|
|||||||
retry_config: RetryConfig::default(),
|
retry_config: RetryConfig::default(),
|
||||||
user_agent: concat!("LanceDB-Rust-Client/", env!("CARGO_PKG_VERSION")).into(),
|
user_agent: concat!("LanceDB-Rust-Client/", env!("CARGO_PKG_VERSION")).into(),
|
||||||
extra_headers: HashMap::new(),
|
extra_headers: HashMap::new(),
|
||||||
id_delimiter: None,
|
|
||||||
tls_config: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -165,7 +145,6 @@ pub struct RestfulLanceDbClient<S: HttpSend = Sender> {
|
|||||||
host: String,
|
host: String,
|
||||||
pub(crate) retry_config: ResolvedRetryConfig,
|
pub(crate) retry_config: ResolvedRetryConfig,
|
||||||
pub(crate) sender: S,
|
pub(crate) sender: S,
|
||||||
pub(crate) id_delimiter: String,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait HttpSend: Clone + Send + Sync + std::fmt::Debug + 'static {
|
pub trait HttpSend: Clone + Send + Sync + std::fmt::Debug + 'static {
|
||||||
@@ -261,49 +240,6 @@ impl RestfulLanceDbClient<Sender> {
|
|||||||
if let Some(timeout) = timeout {
|
if let Some(timeout) = timeout {
|
||||||
client_builder = client_builder.timeout(timeout);
|
client_builder = client_builder.timeout(timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure mTLS if TlsConfig is provided
|
|
||||||
if let Some(tls_config) = &client_config.tls_config {
|
|
||||||
// Load client certificate and key for mTLS
|
|
||||||
if let (Some(cert_file), Some(key_file)) = (&tls_config.cert_file, &tls_config.key_file)
|
|
||||||
{
|
|
||||||
let cert = std::fs::read(cert_file).map_err(|err| Error::Other {
|
|
||||||
message: format!("Failed to read certificate file: {}", cert_file),
|
|
||||||
source: Some(Box::new(err)),
|
|
||||||
})?;
|
|
||||||
let key = std::fs::read(key_file).map_err(|err| Error::Other {
|
|
||||||
message: format!("Failed to read key file: {}", key_file),
|
|
||||||
source: Some(Box::new(err)),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let identity = reqwest::Identity::from_pem(&[&cert[..], &key[..]].concat())
|
|
||||||
.map_err(|err| Error::Other {
|
|
||||||
message: "Failed to create client identity from certificate and key".into(),
|
|
||||||
source: Some(Box::new(err)),
|
|
||||||
})?;
|
|
||||||
client_builder = client_builder.identity(identity);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load CA certificate for server verification
|
|
||||||
if let Some(ca_cert_file) = &tls_config.ssl_ca_cert {
|
|
||||||
let ca_cert = std::fs::read(ca_cert_file).map_err(|err| Error::Other {
|
|
||||||
message: format!("Failed to read CA certificate file: {}", ca_cert_file),
|
|
||||||
source: Some(Box::new(err)),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let ca_cert =
|
|
||||||
reqwest::Certificate::from_pem(&ca_cert).map_err(|err| Error::Other {
|
|
||||||
message: "Failed to create CA certificate from PEM".into(),
|
|
||||||
source: Some(Box::new(err)),
|
|
||||||
})?;
|
|
||||||
client_builder = client_builder.add_root_certificate(ca_cert);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Configure hostname verification
|
|
||||||
client_builder =
|
|
||||||
client_builder.danger_accept_invalid_hostnames(!tls_config.assert_hostname);
|
|
||||||
}
|
|
||||||
|
|
||||||
let client = client_builder
|
let client = client_builder
|
||||||
.default_headers(Self::default_headers(
|
.default_headers(Self::default_headers(
|
||||||
api_key,
|
api_key,
|
||||||
@@ -332,7 +268,6 @@ impl RestfulLanceDbClient<Sender> {
|
|||||||
host,
|
host,
|
||||||
retry_config,
|
retry_config,
|
||||||
sender: Sender,
|
sender: Sender,
|
||||||
id_delimiter: client_config.id_delimiter.unwrap_or("$".to_string()),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -421,22 +356,12 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
|
|
||||||
pub fn get(&self, uri: &str) -> RequestBuilder {
|
pub fn get(&self, uri: &str) -> RequestBuilder {
|
||||||
let full_uri = format!("{}{}", self.host, uri);
|
let full_uri = format!("{}{}", self.host, uri);
|
||||||
let builder = self.client.get(full_uri);
|
self.client.get(full_uri)
|
||||||
self.add_id_delimiter_query_param(builder)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn post(&self, uri: &str) -> RequestBuilder {
|
pub fn post(&self, uri: &str) -> RequestBuilder {
|
||||||
let full_uri = format!("{}{}", self.host, uri);
|
let full_uri = format!("{}{}", self.host, uri);
|
||||||
let builder = self.client.post(full_uri);
|
self.client.post(full_uri)
|
||||||
self.add_id_delimiter_query_param(builder)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_id_delimiter_query_param(&self, req: RequestBuilder) -> RequestBuilder {
|
|
||||||
if self.id_delimiter != "$" {
|
|
||||||
req.query(&[("delimiter", self.id_delimiter.clone())])
|
|
||||||
} else {
|
|
||||||
req
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn send(&self, req: RequestBuilder) -> Result<(String, Response)> {
|
pub async fn send(&self, req: RequestBuilder) -> Result<(String, Response)> {
|
||||||
@@ -669,7 +594,6 @@ pub mod test_utils {
|
|||||||
sender: MockSender {
|
sender: MockSender {
|
||||||
f: Arc::new(wrapper),
|
f: Arc::new(wrapper),
|
||||||
},
|
},
|
||||||
id_delimiter: "$".to_string(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -720,50 +644,4 @@ mod tests {
|
|||||||
Some(Duration::from_secs(120))
|
Some(Duration::from_secs(120))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_tls_config_default() {
|
|
||||||
let config = TlsConfig::default();
|
|
||||||
assert!(config.cert_file.is_none());
|
|
||||||
assert!(config.key_file.is_none());
|
|
||||||
assert!(config.ssl_ca_cert.is_none());
|
|
||||||
assert!(!config.assert_hostname);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_tls_config_with_mtls() {
|
|
||||||
let tls_config = TlsConfig {
|
|
||||||
cert_file: Some("/path/to/cert.pem".to_string()),
|
|
||||||
key_file: Some("/path/to/key.pem".to_string()),
|
|
||||||
ssl_ca_cert: Some("/path/to/ca.pem".to_string()),
|
|
||||||
assert_hostname: true,
|
|
||||||
};
|
|
||||||
|
|
||||||
assert_eq!(tls_config.cert_file, Some("/path/to/cert.pem".to_string()));
|
|
||||||
assert_eq!(tls_config.key_file, Some("/path/to/key.pem".to_string()));
|
|
||||||
assert_eq!(tls_config.ssl_ca_cert, Some("/path/to/ca.pem".to_string()));
|
|
||||||
assert!(tls_config.assert_hostname);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_client_config_with_tls() {
|
|
||||||
let tls_config = TlsConfig {
|
|
||||||
cert_file: Some("/path/to/cert.pem".to_string()),
|
|
||||||
key_file: Some("/path/to/key.pem".to_string()),
|
|
||||||
ssl_ca_cert: None,
|
|
||||||
assert_hostname: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
let client_config = ClientConfig {
|
|
||||||
tls_config: Some(tls_config.clone()),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
|
|
||||||
assert!(client_config.tls_config.is_some());
|
|
||||||
let config_tls = client_config.tls_config.unwrap();
|
|
||||||
assert_eq!(config_tls.cert_file, Some("/path/to/cert.pem".to_string()));
|
|
||||||
assert_eq!(config_tls.key_file, Some("/path/to/key.pem".to_string()));
|
|
||||||
assert!(config_tls.ssl_ca_cert.is_none());
|
|
||||||
assert!(!config_tls.assert_hostname);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,9 +14,8 @@ use serde::Deserialize;
|
|||||||
use tokio::task::spawn_blocking;
|
use tokio::task::spawn_blocking;
|
||||||
|
|
||||||
use crate::database::{
|
use crate::database::{
|
||||||
CreateNamespaceRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database,
|
CreateTableData, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
|
||||||
DatabaseOptions, DropNamespaceRequest, ListNamespacesRequest, OpenTableRequest,
|
OpenTableRequest, TableNamesRequest,
|
||||||
TableNamesRequest,
|
|
||||||
};
|
};
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::table::BaseTable;
|
use crate::table::BaseTable;
|
||||||
@@ -246,61 +245,10 @@ impl From<&CreateTableMode> for &'static str {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_table_identifier(name: &str, namespace: &[String], delimiter: &str) -> String {
|
|
||||||
if !namespace.is_empty() {
|
|
||||||
let mut parts = namespace.to_vec();
|
|
||||||
parts.push(name.to_string());
|
|
||||||
parts.join(delimiter)
|
|
||||||
} else {
|
|
||||||
name.to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_namespace_identifier(namespace: &[String], delimiter: &str) -> String {
|
|
||||||
if namespace.is_empty() {
|
|
||||||
// According to the namespace spec, use delimiter to represent root namespace
|
|
||||||
delimiter.to_string()
|
|
||||||
} else {
|
|
||||||
namespace.join(delimiter)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Build a secure cache key using length prefixes.
|
|
||||||
/// This format is completely unambiguous regardless of delimiter or content.
|
|
||||||
/// Format: [u32_len][namespace1][u32_len][namespace2]...[u32_len][table_name]
|
|
||||||
/// Returns a hex-encoded string for use as a cache key.
|
|
||||||
fn build_cache_key(name: &str, namespace: &[String]) -> String {
|
|
||||||
let mut key = Vec::new();
|
|
||||||
|
|
||||||
// Add each namespace component with length prefix
|
|
||||||
for ns in namespace {
|
|
||||||
let bytes = ns.as_bytes();
|
|
||||||
key.extend_from_slice(&(bytes.len() as u32).to_le_bytes());
|
|
||||||
key.extend_from_slice(bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add table name with length prefix
|
|
||||||
let name_bytes = name.as_bytes();
|
|
||||||
key.extend_from_slice(&(name_bytes.len() as u32).to_le_bytes());
|
|
||||||
key.extend_from_slice(name_bytes);
|
|
||||||
|
|
||||||
// Convert to hex string for use as a cache key
|
|
||||||
key.iter().map(|b| format!("{:02x}", b)).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl<S: HttpSend> Database for RemoteDatabase<S> {
|
impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||||
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
|
async fn table_names(&self, request: TableNamesRequest) -> Result<Vec<String>> {
|
||||||
let mut req = if !request.namespace.is_empty() {
|
let mut req = self.client.get("/v1/table/");
|
||||||
let namespace_id =
|
|
||||||
build_namespace_identifier(&request.namespace, &self.client.id_delimiter);
|
|
||||||
self.client
|
|
||||||
.get(&format!("/v1/namespace/{}/table/list", namespace_id))
|
|
||||||
} else {
|
|
||||||
// TODO: use new API for all listing operations once stable
|
|
||||||
self.client.get("/v1/table/")
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(limit) = request.limit {
|
if let Some(limit) = request.limit {
|
||||||
req = req.query(&[("limit", limit)]);
|
req = req.query(&[("limit", limit)]);
|
||||||
}
|
}
|
||||||
@@ -316,17 +264,12 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
|||||||
.err_to_http(request_id)?
|
.err_to_http(request_id)?
|
||||||
.tables;
|
.tables;
|
||||||
for table in &tables {
|
for table in &tables {
|
||||||
let table_identifier =
|
|
||||||
build_table_identifier(table, &request.namespace, &self.client.id_delimiter);
|
|
||||||
let cache_key = build_cache_key(table, &request.namespace);
|
|
||||||
let remote_table = Arc::new(RemoteTable::new(
|
let remote_table = Arc::new(RemoteTable::new(
|
||||||
self.client.clone(),
|
self.client.clone(),
|
||||||
table.clone(),
|
table.clone(),
|
||||||
request.namespace.clone(),
|
|
||||||
table_identifier.clone(),
|
|
||||||
version.clone(),
|
version.clone(),
|
||||||
));
|
));
|
||||||
self.table_cache.insert(cache_key, remote_table).await;
|
self.table_cache.insert(table.clone(), remote_table).await;
|
||||||
}
|
}
|
||||||
Ok(tables)
|
Ok(tables)
|
||||||
}
|
}
|
||||||
@@ -352,11 +295,9 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
|||||||
.await
|
.await
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
|
|
||||||
let identifier =
|
|
||||||
build_table_identifier(&request.name, &request.namespace, &self.client.id_delimiter);
|
|
||||||
let req = self
|
let req = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/create/", identifier))
|
.post(&format!("/v1/table/{}/create/", request.name))
|
||||||
.query(&[("mode", Into::<&str>::into(&request.mode))])
|
.query(&[("mode", Into::<&str>::into(&request.mode))])
|
||||||
.body(data_buffer)
|
.body(data_buffer)
|
||||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||||
@@ -373,7 +314,6 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
|||||||
CreateTableMode::ExistOk(callback) => {
|
CreateTableMode::ExistOk(callback) => {
|
||||||
let req = OpenTableRequest {
|
let req = OpenTableRequest {
|
||||||
name: request.name.clone(),
|
name: request.name.clone(),
|
||||||
namespace: request.namespace.clone(),
|
|
||||||
index_cache_size: None,
|
index_cache_size: None,
|
||||||
lance_read_params: None,
|
lance_read_params: None,
|
||||||
};
|
};
|
||||||
@@ -402,160 +342,70 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
|||||||
}
|
}
|
||||||
let rsp = self.client.check_response(&request_id, rsp).await?;
|
let rsp = self.client.check_response(&request_id, rsp).await?;
|
||||||
let version = parse_server_version(&request_id, &rsp)?;
|
let version = parse_server_version(&request_id, &rsp)?;
|
||||||
let table_identifier =
|
|
||||||
build_table_identifier(&request.name, &request.namespace, &self.client.id_delimiter);
|
|
||||||
let cache_key = build_cache_key(&request.name, &request.namespace);
|
|
||||||
let table = Arc::new(RemoteTable::new(
|
let table = Arc::new(RemoteTable::new(
|
||||||
self.client.clone(),
|
self.client.clone(),
|
||||||
request.name.clone(),
|
request.name.clone(),
|
||||||
request.namespace.clone(),
|
|
||||||
table_identifier,
|
|
||||||
version,
|
version,
|
||||||
));
|
));
|
||||||
self.table_cache.insert(cache_key, table.clone()).await;
|
self.table_cache
|
||||||
|
.insert(request.name.clone(), table.clone())
|
||||||
|
.await;
|
||||||
|
|
||||||
Ok(table)
|
Ok(table)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||||
let identifier =
|
|
||||||
build_table_identifier(&request.name, &request.namespace, &self.client.id_delimiter);
|
|
||||||
let cache_key = build_cache_key(&request.name, &request.namespace);
|
|
||||||
|
|
||||||
// We describe the table to confirm it exists before moving on.
|
// We describe the table to confirm it exists before moving on.
|
||||||
if let Some(table) = self.table_cache.get(&cache_key).await {
|
if let Some(table) = self.table_cache.get(&request.name).await {
|
||||||
Ok(table.clone())
|
Ok(table.clone())
|
||||||
} else {
|
} else {
|
||||||
let req = self
|
let req = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/describe/", identifier));
|
.post(&format!("/v1/table/{}/describe/", request.name));
|
||||||
let (request_id, rsp) = self.client.send_with_retry(req, None, true).await?;
|
let (request_id, rsp) = self.client.send_with_retry(req, None, true).await?;
|
||||||
if rsp.status() == StatusCode::NOT_FOUND {
|
if rsp.status() == StatusCode::NOT_FOUND {
|
||||||
return Err(crate::Error::TableNotFound {
|
return Err(crate::Error::TableNotFound { name: request.name });
|
||||||
name: identifier.clone(),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
let rsp = self.client.check_response(&request_id, rsp).await?;
|
let rsp = self.client.check_response(&request_id, rsp).await?;
|
||||||
let version = parse_server_version(&request_id, &rsp)?;
|
let version = parse_server_version(&request_id, &rsp)?;
|
||||||
let table_identifier = build_table_identifier(
|
|
||||||
&request.name,
|
|
||||||
&request.namespace,
|
|
||||||
&self.client.id_delimiter,
|
|
||||||
);
|
|
||||||
let table = Arc::new(RemoteTable::new(
|
let table = Arc::new(RemoteTable::new(
|
||||||
self.client.clone(),
|
self.client.clone(),
|
||||||
request.name.clone(),
|
request.name.clone(),
|
||||||
request.namespace.clone(),
|
|
||||||
table_identifier,
|
|
||||||
version,
|
version,
|
||||||
));
|
));
|
||||||
let cache_key = build_cache_key(&request.name, &request.namespace);
|
self.table_cache.insert(request.name, table.clone()).await;
|
||||||
self.table_cache.insert(cache_key, table.clone()).await;
|
|
||||||
Ok(table)
|
Ok(table)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn rename_table(
|
async fn rename_table(&self, current_name: &str, new_name: &str) -> Result<()> {
|
||||||
&self,
|
|
||||||
current_name: &str,
|
|
||||||
new_name: &str,
|
|
||||||
cur_namespace: &[String],
|
|
||||||
new_namespace: &[String],
|
|
||||||
) -> Result<()> {
|
|
||||||
let current_identifier =
|
|
||||||
build_table_identifier(current_name, cur_namespace, &self.client.id_delimiter);
|
|
||||||
let current_cache_key = build_cache_key(current_name, cur_namespace);
|
|
||||||
let new_cache_key = build_cache_key(new_name, new_namespace);
|
|
||||||
|
|
||||||
let mut body = serde_json::json!({ "new_table_name": new_name });
|
|
||||||
if !new_namespace.is_empty() {
|
|
||||||
body["new_namespace"] = serde_json::Value::Array(
|
|
||||||
new_namespace
|
|
||||||
.iter()
|
|
||||||
.map(|s| serde_json::Value::String(s.clone()))
|
|
||||||
.collect(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let req = self
|
let req = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/rename/", current_identifier))
|
.post(&format!("/v1/table/{}/rename/", current_name));
|
||||||
.json(&body);
|
let req = req.json(&serde_json::json!({ "new_table_name": new_name }));
|
||||||
let (request_id, resp) = self.client.send(req).await?;
|
let (request_id, resp) = self.client.send(req).await?;
|
||||||
self.client.check_response(&request_id, resp).await?;
|
self.client.check_response(&request_id, resp).await?;
|
||||||
let table = self.table_cache.remove(¤t_cache_key).await;
|
let table = self.table_cache.remove(current_name).await;
|
||||||
if let Some(table) = table {
|
if let Some(table) = table {
|
||||||
self.table_cache.insert(new_cache_key, table).await;
|
self.table_cache.insert(new_name.into(), table).await;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drop_table(&self, name: &str, namespace: &[String]) -> Result<()> {
|
async fn drop_table(&self, name: &str) -> Result<()> {
|
||||||
let identifier = build_table_identifier(name, namespace, &self.client.id_delimiter);
|
let req = self.client.post(&format!("/v1/table/{}/drop/", name));
|
||||||
let cache_key = build_cache_key(name, namespace);
|
|
||||||
let req = self.client.post(&format!("/v1/table/{}/drop/", identifier));
|
|
||||||
let (request_id, resp) = self.client.send(req).await?;
|
let (request_id, resp) = self.client.send(req).await?;
|
||||||
self.client.check_response(&request_id, resp).await?;
|
self.client.check_response(&request_id, resp).await?;
|
||||||
self.table_cache.remove(&cache_key).await;
|
self.table_cache.remove(name).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()> {
|
async fn drop_all_tables(&self) -> Result<()> {
|
||||||
// TODO: Implement namespace-aware drop_all_tables
|
|
||||||
let _namespace = namespace; // Suppress unused warning for now
|
|
||||||
Err(crate::Error::NotSupported {
|
Err(crate::Error::NotSupported {
|
||||||
message: "Dropping all tables is not currently supported in the remote API".to_string(),
|
message: "Dropping databases is not supported in the remote API".to_string(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn list_namespaces(&self, request: ListNamespacesRequest) -> Result<Vec<String>> {
|
|
||||||
let namespace_id =
|
|
||||||
build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
|
|
||||||
let mut req = self
|
|
||||||
.client
|
|
||||||
.get(&format!("/v1/namespace/{}/list", namespace_id));
|
|
||||||
if let Some(limit) = request.limit {
|
|
||||||
req = req.query(&[("limit", limit)]);
|
|
||||||
}
|
|
||||||
if let Some(page_token) = request.page_token {
|
|
||||||
req = req.query(&[("page_token", page_token)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
let (request_id, resp) = self.client.send(req).await?;
|
|
||||||
let resp = self.client.check_response(&request_id, resp).await?;
|
|
||||||
|
|
||||||
#[derive(Deserialize)]
|
|
||||||
struct ListNamespacesResponse {
|
|
||||||
namespaces: Vec<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
let parsed: ListNamespacesResponse = resp.json().await.map_err(|e| Error::Runtime {
|
|
||||||
message: format!("Failed to parse namespace response: {}", e),
|
|
||||||
})?;
|
|
||||||
Ok(parsed.namespaces)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn create_namespace(&self, request: CreateNamespaceRequest) -> Result<()> {
|
|
||||||
let namespace_id =
|
|
||||||
build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
|
|
||||||
let req = self
|
|
||||||
.client
|
|
||||||
.post(&format!("/v1/namespace/{}/create", namespace_id));
|
|
||||||
let (request_id, resp) = self.client.send(req).await?;
|
|
||||||
self.client.check_response(&request_id, resp).await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn drop_namespace(&self, request: DropNamespaceRequest) -> Result<()> {
|
|
||||||
let namespace_id =
|
|
||||||
build_namespace_identifier(request.namespace.as_slice(), &self.client.id_delimiter);
|
|
||||||
let req = self
|
|
||||||
.client
|
|
||||||
.post(&format!("/v1/namespace/{}/drop", namespace_id));
|
|
||||||
let (request_id, resp) = self.client.send(req).await?;
|
|
||||||
self.client.check_response(&request_id, resp).await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn as_any(&self) -> &dyn std::any::Any {
|
fn as_any(&self) -> &dyn std::any::Any {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
@@ -586,7 +436,6 @@ impl From<StorageOptions> for RemoteOptions {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::build_cache_key;
|
|
||||||
use std::sync::{Arc, OnceLock};
|
use std::sync::{Arc, OnceLock};
|
||||||
|
|
||||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
|
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
|
||||||
@@ -599,38 +448,6 @@ mod tests {
|
|||||||
Connection, Error,
|
Connection, Error,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_cache_key_security() {
|
|
||||||
// Test that cache keys are unique regardless of delimiter manipulation
|
|
||||||
|
|
||||||
// Case 1: Different delimiters should not affect cache key
|
|
||||||
let key1 = build_cache_key("table1", &["ns1".to_string(), "ns2".to_string()]);
|
|
||||||
let key2 = build_cache_key("table1", &["ns1$ns2".to_string()]);
|
|
||||||
assert_ne!(
|
|
||||||
key1, key2,
|
|
||||||
"Cache keys should differ for different namespace structures"
|
|
||||||
);
|
|
||||||
|
|
||||||
// Case 2: Table name containing delimiter should not cause collision
|
|
||||||
let key3 = build_cache_key("ns2$table1", &["ns1".to_string()]);
|
|
||||||
assert_ne!(
|
|
||||||
key1, key3,
|
|
||||||
"Cache key should be different when table name contains delimiter"
|
|
||||||
);
|
|
||||||
|
|
||||||
// Case 3: Empty namespace vs namespace with empty string
|
|
||||||
let key4 = build_cache_key("table1", &[]);
|
|
||||||
let key5 = build_cache_key("table1", &["".to_string()]);
|
|
||||||
assert_ne!(
|
|
||||||
key4, key5,
|
|
||||||
"Empty namespace should differ from namespace with empty string"
|
|
||||||
);
|
|
||||||
|
|
||||||
// Case 4: Verify same inputs produce same key (consistency)
|
|
||||||
let key6 = build_cache_key("table1", &["ns1".to_string(), "ns2".to_string()]);
|
|
||||||
assert_eq!(key1, key6, "Same inputs should produce same cache key");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_retries() {
|
async fn test_retries() {
|
||||||
// We'll record the request_id here, to check it matches the one in the error.
|
// We'll record the request_id here, to check it matches the one in the error.
|
||||||
@@ -894,7 +711,7 @@ mod tests {
|
|||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
http::Response::builder().status(200).body("").unwrap()
|
||||||
});
|
});
|
||||||
conn.drop_table("table1", &[]).await.unwrap();
|
conn.drop_table("table1").await.unwrap();
|
||||||
// NOTE: the API will return 200 even if the table does not exist. So we shouldn't expect 404.
|
// NOTE: the API will return 200 even if the table does not exist. So we shouldn't expect 404.
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -914,9 +731,7 @@ mod tests {
|
|||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
http::Response::builder().status(200).body("").unwrap()
|
||||||
});
|
});
|
||||||
conn.rename_table("table1", "table2", &[], &[])
|
conn.rename_table("table1", "table2").await.unwrap();
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -930,186 +745,4 @@ mod tests {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_table_names_with_root_namespace() {
|
|
||||||
// When namespace is empty (root namespace), should use /v1/table/ for backwards compatibility
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::GET);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/");
|
|
||||||
assert_eq!(request.url().query(), None);
|
|
||||||
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(r#"{"tables": ["table1", "table2"]}"#)
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
let names = conn
|
|
||||||
.table_names()
|
|
||||||
.namespace(vec![])
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(names, vec!["table1", "table2"]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_table_names_with_namespace() {
|
|
||||||
// When namespace is non-empty, should use /v1/namespace/{id}/table/list
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::GET);
|
|
||||||
assert_eq!(request.url().path(), "/v1/namespace/test/table/list");
|
|
||||||
assert_eq!(request.url().query(), None);
|
|
||||||
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(r#"{"tables": ["table1", "table2"]}"#)
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
let names = conn
|
|
||||||
.table_names()
|
|
||||||
.namespace(vec!["test".to_string()])
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(names, vec!["table1", "table2"]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_table_names_with_nested_namespace() {
|
|
||||||
// When namespace is vec!["ns1", "ns2"], should use /v1/namespace/ns1$ns2/table/list
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::GET);
|
|
||||||
assert_eq!(request.url().path(), "/v1/namespace/ns1$ns2/table/list");
|
|
||||||
assert_eq!(request.url().query(), None);
|
|
||||||
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(r#"{"tables": ["ns1$ns2$table1", "ns1$ns2$table2"]}"#)
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
let names = conn
|
|
||||||
.table_names()
|
|
||||||
.namespace(vec!["ns1".to_string(), "ns2".to_string()])
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(names, vec!["ns1$ns2$table1", "ns1$ns2$table2"]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_open_table_with_namespace() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/ns1$ns2$table1/describe/");
|
|
||||||
assert_eq!(request.url().query(), None);
|
|
||||||
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(r#"{"table": "table1"}"#)
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
let table = conn
|
|
||||||
.open_table("table1")
|
|
||||||
.namespace(vec!["ns1".to_string(), "ns2".to_string()])
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(table.name(), "table1");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_create_table_with_namespace() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/ns1$table1/create/");
|
|
||||||
assert_eq!(
|
|
||||||
request
|
|
||||||
.headers()
|
|
||||||
.get(reqwest::header::CONTENT_TYPE)
|
|
||||||
.unwrap(),
|
|
||||||
ARROW_STREAM_CONTENT_TYPE.as_bytes()
|
|
||||||
);
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
let data = RecordBatch::try_new(
|
|
||||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
|
||||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
|
||||||
let table = conn
|
|
||||||
.create_table("table1", reader)
|
|
||||||
.namespace(vec!["ns1".to_string()])
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
assert_eq!(table.name(), "table1");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_drop_table_with_namespace() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/ns1$ns2$table1/drop/");
|
|
||||||
assert_eq!(request.url().query(), None);
|
|
||||||
assert!(request.body().is_none());
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
conn.drop_table("table1", &["ns1".to_string(), "ns2".to_string()])
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_rename_table_with_namespace() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/ns1$table1/rename/");
|
|
||||||
assert_eq!(
|
|
||||||
request.headers().get("Content-Type").unwrap(),
|
|
||||||
JSON_CONTENT_TYPE
|
|
||||||
);
|
|
||||||
|
|
||||||
let body = request.body().unwrap().as_bytes().unwrap();
|
|
||||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
|
||||||
assert_eq!(body["new_table_name"], "table2");
|
|
||||||
assert_eq!(body["new_namespace"], serde_json::json!(["ns2"]));
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
conn.rename_table(
|
|
||||||
"table1",
|
|
||||||
"table2",
|
|
||||||
&["ns1".to_string()],
|
|
||||||
&["ns2".to_string()],
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_create_empty_table_with_namespace() {
|
|
||||||
let conn = Connection::new_with_handler(|request| {
|
|
||||||
assert_eq!(request.method(), &reqwest::Method::POST);
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/prod$data$metrics/create/");
|
|
||||||
assert_eq!(
|
|
||||||
request
|
|
||||||
.headers()
|
|
||||||
.get(reqwest::header::CONTENT_TYPE)
|
|
||||||
.unwrap(),
|
|
||||||
ARROW_STREAM_CONTENT_TYPE.as_bytes()
|
|
||||||
);
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
});
|
|
||||||
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
|
|
||||||
conn.create_empty_table("metrics", schema)
|
|
||||||
.namespace(vec!["prod".to_string(), "data".to_string()])
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ impl<S: HttpSend + 'static> Tags for RemoteTags<'_, S> {
|
|||||||
let request = self
|
let request = self
|
||||||
.inner
|
.inner
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/tags/list/", self.inner.identifier));
|
.post(&format!("/v1/table/{}/tags/list/", self.inner.name));
|
||||||
let (request_id, response) = self.inner.send(request, true).await?;
|
let (request_id, response) = self.inner.send(request, true).await?;
|
||||||
let response = self
|
let response = self
|
||||||
.inner
|
.inner
|
||||||
@@ -104,10 +104,7 @@ impl<S: HttpSend + 'static> Tags for RemoteTags<'_, S> {
|
|||||||
let request = self
|
let request = self
|
||||||
.inner
|
.inner
|
||||||
.client
|
.client
|
||||||
.post(&format!(
|
.post(&format!("/v1/table/{}/tags/version/", self.inner.name))
|
||||||
"/v1/table/{}/tags/version/",
|
|
||||||
self.inner.identifier
|
|
||||||
))
|
|
||||||
.json(&serde_json::json!({ "tag": tag }));
|
.json(&serde_json::json!({ "tag": tag }));
|
||||||
|
|
||||||
let (request_id, response) = self.inner.send(request, true).await?;
|
let (request_id, response) = self.inner.send(request, true).await?;
|
||||||
@@ -149,7 +146,7 @@ impl<S: HttpSend + 'static> Tags for RemoteTags<'_, S> {
|
|||||||
let request = self
|
let request = self
|
||||||
.inner
|
.inner
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/tags/create/", self.inner.identifier))
|
.post(&format!("/v1/table/{}/tags/create/", self.inner.name))
|
||||||
.json(&serde_json::json!({
|
.json(&serde_json::json!({
|
||||||
"tag": tag,
|
"tag": tag,
|
||||||
"version": version
|
"version": version
|
||||||
@@ -166,7 +163,7 @@ impl<S: HttpSend + 'static> Tags for RemoteTags<'_, S> {
|
|||||||
let request = self
|
let request = self
|
||||||
.inner
|
.inner
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/tags/delete/", self.inner.identifier))
|
.post(&format!("/v1/table/{}/tags/delete/", self.inner.name))
|
||||||
.json(&serde_json::json!({ "tag": tag }));
|
.json(&serde_json::json!({ "tag": tag }));
|
||||||
|
|
||||||
let (request_id, response) = self.inner.send(request, true).await?;
|
let (request_id, response) = self.inner.send(request, true).await?;
|
||||||
@@ -180,7 +177,7 @@ impl<S: HttpSend + 'static> Tags for RemoteTags<'_, S> {
|
|||||||
let request = self
|
let request = self
|
||||||
.inner
|
.inner
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/tags/update/", self.inner.identifier))
|
.post(&format!("/v1/table/{}/tags/update/", self.inner.name))
|
||||||
.json(&serde_json::json!({
|
.json(&serde_json::json!({
|
||||||
"tag": tag,
|
"tag": tag,
|
||||||
"version": version
|
"version": version
|
||||||
@@ -199,8 +196,6 @@ pub struct RemoteTable<S: HttpSend = Sender> {
|
|||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
client: RestfulLanceDbClient<S>,
|
client: RestfulLanceDbClient<S>,
|
||||||
name: String,
|
name: String,
|
||||||
namespace: Vec<String>,
|
|
||||||
identifier: String,
|
|
||||||
server_version: ServerVersion,
|
server_version: ServerVersion,
|
||||||
|
|
||||||
version: RwLock<Option<u64>>,
|
version: RwLock<Option<u64>>,
|
||||||
@@ -210,15 +205,11 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
pub fn new(
|
pub fn new(
|
||||||
client: RestfulLanceDbClient<S>,
|
client: RestfulLanceDbClient<S>,
|
||||||
name: String,
|
name: String,
|
||||||
namespace: Vec<String>,
|
|
||||||
identifier: String,
|
|
||||||
server_version: ServerVersion,
|
server_version: ServerVersion,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
client,
|
client,
|
||||||
name,
|
name,
|
||||||
namespace,
|
|
||||||
identifier,
|
|
||||||
server_version,
|
server_version,
|
||||||
version: RwLock::new(None),
|
version: RwLock::new(None),
|
||||||
}
|
}
|
||||||
@@ -232,7 +223,7 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
async fn describe_version(&self, version: Option<u64>) -> Result<TableDescription> {
|
async fn describe_version(&self, version: Option<u64>) -> Result<TableDescription> {
|
||||||
let mut request = self
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/describe/", self.identifier));
|
.post(&format!("/v1/table/{}/describe/", self.name));
|
||||||
|
|
||||||
let body = serde_json::json!({ "version": version });
|
let body = serde_json::json!({ "version": version });
|
||||||
request = request.json(&body);
|
request = request.json(&body);
|
||||||
@@ -343,7 +334,7 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
) -> Result<reqwest::Response> {
|
) -> Result<reqwest::Response> {
|
||||||
if response.status() == StatusCode::NOT_FOUND {
|
if response.status() == StatusCode::NOT_FOUND {
|
||||||
return Err(Error::TableNotFound {
|
return Err(Error::TableNotFound {
|
||||||
name: self.identifier.clone(),
|
name: self.name.clone(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -557,9 +548,7 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
query: &AnyQuery,
|
query: &AnyQuery,
|
||||||
options: &QueryExecutionOptions,
|
options: &QueryExecutionOptions,
|
||||||
) -> Result<Vec<Pin<Box<dyn RecordBatchStream + Send>>>> {
|
) -> Result<Vec<Pin<Box<dyn RecordBatchStream + Send>>>> {
|
||||||
let mut request = self
|
let mut request = self.client.post(&format!("/v1/table/{}/query/", self.name));
|
||||||
.client
|
|
||||||
.post(&format!("/v1/table/{}/query/", self.identifier));
|
|
||||||
|
|
||||||
if let Some(timeout) = options.timeout {
|
if let Some(timeout) = options.timeout {
|
||||||
// Also send to server, so it can abort the query if it takes too long.
|
// Also send to server, so it can abort the query if it takes too long.
|
||||||
@@ -626,7 +615,7 @@ struct TableDescription {
|
|||||||
|
|
||||||
impl<S: HttpSend> std::fmt::Display for RemoteTable<S> {
|
impl<S: HttpSend> std::fmt::Display for RemoteTable<S> {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f, "RemoteTable({})", self.identifier)
|
write!(f, "RemoteTable({})", self.name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -645,9 +634,7 @@ mod test_utils {
|
|||||||
let client = client_with_handler(handler);
|
let client = client_with_handler(handler);
|
||||||
Self {
|
Self {
|
||||||
client,
|
client,
|
||||||
name: name.clone(),
|
name,
|
||||||
namespace: vec![],
|
|
||||||
identifier: name,
|
|
||||||
server_version: version.map(ServerVersion).unwrap_or_default(),
|
server_version: version.map(ServerVersion).unwrap_or_default(),
|
||||||
version: RwLock::new(None),
|
version: RwLock::new(None),
|
||||||
}
|
}
|
||||||
@@ -663,14 +650,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
fn name(&self) -> &str {
|
fn name(&self) -> &str {
|
||||||
&self.name
|
&self.name
|
||||||
}
|
}
|
||||||
|
|
||||||
fn namespace(&self) -> &[String] {
|
|
||||||
&self.namespace
|
|
||||||
}
|
|
||||||
|
|
||||||
fn id(&self) -> &str {
|
|
||||||
&self.identifier
|
|
||||||
}
|
|
||||||
async fn version(&self) -> Result<u64> {
|
async fn version(&self) -> Result<u64> {
|
||||||
self.describe().await.map(|desc| desc.version)
|
self.describe().await.map(|desc| desc.version)
|
||||||
}
|
}
|
||||||
@@ -699,7 +678,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
async fn restore(&self) -> Result<()> {
|
async fn restore(&self) -> Result<()> {
|
||||||
let mut request = self
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/restore/", self.identifier));
|
.post(&format!("/v1/table/{}/restore/", self.name));
|
||||||
let version = self.current_version().await;
|
let version = self.current_version().await;
|
||||||
let body = serde_json::json!({ "version": version });
|
let body = serde_json::json!({ "version": version });
|
||||||
request = request.json(&body);
|
request = request.json(&body);
|
||||||
@@ -713,7 +692,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
async fn list_versions(&self) -> Result<Vec<Version>> {
|
async fn list_versions(&self) -> Result<Vec<Version>> {
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/version/list/", self.identifier));
|
.post(&format!("/v1/table/{}/version/list/", self.name));
|
||||||
let (request_id, response) = self.send(request, true).await?;
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
let response = self.check_table_response(&request_id, response).await?;
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
|
|
||||||
@@ -744,7 +723,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
async fn count_rows(&self, filter: Option<Filter>) -> Result<usize> {
|
async fn count_rows(&self, filter: Option<Filter>) -> Result<usize> {
|
||||||
let mut request = self
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/count_rows/", self.identifier));
|
.post(&format!("/v1/table/{}/count_rows/", self.name));
|
||||||
|
|
||||||
let version = self.current_version().await;
|
let version = self.current_version().await;
|
||||||
|
|
||||||
@@ -780,7 +759,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
let mut request = self
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/insert/", self.identifier))
|
.post(&format!("/v1/table/{}/insert/", self.name))
|
||||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||||
|
|
||||||
match add.mode {
|
match add.mode {
|
||||||
@@ -852,7 +831,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
async fn explain_plan(&self, query: &AnyQuery, verbose: bool) -> Result<String> {
|
async fn explain_plan(&self, query: &AnyQuery, verbose: bool) -> Result<String> {
|
||||||
let base_request = self
|
let base_request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/explain_plan/", self.identifier));
|
.post(&format!("/v1/table/{}/explain_plan/", self.name));
|
||||||
|
|
||||||
let query_bodies = self.prepare_query_bodies(query).await?;
|
let query_bodies = self.prepare_query_bodies(query).await?;
|
||||||
let requests: Vec<reqwest::RequestBuilder> = query_bodies
|
let requests: Vec<reqwest::RequestBuilder> = query_bodies
|
||||||
@@ -901,7 +880,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/analyze_plan/", self.identifier));
|
.post(&format!("/v1/table/{}/analyze_plan/", self.name));
|
||||||
|
|
||||||
let query_bodies = self.prepare_query_bodies(query).await?;
|
let query_bodies = self.prepare_query_bodies(query).await?;
|
||||||
let requests: Vec<reqwest::RequestBuilder> = query_bodies
|
let requests: Vec<reqwest::RequestBuilder> = query_bodies
|
||||||
@@ -940,7 +919,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/update/", self.identifier));
|
.post(&format!("/v1/table/{}/update/", self.name));
|
||||||
|
|
||||||
let mut updates = Vec::new();
|
let mut updates = Vec::new();
|
||||||
for (column, expression) in update.columns {
|
for (column, expression) in update.columns {
|
||||||
@@ -979,7 +958,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
let body = serde_json::json!({ "predicate": predicate });
|
let body = serde_json::json!({ "predicate": predicate });
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/delete/", self.identifier))
|
.post(&format!("/v1/table/{}/delete/", self.name))
|
||||||
.json(&body);
|
.json(&body);
|
||||||
let (request_id, response) = self.send(request, true).await?;
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
let response = self.check_table_response(&request_id, response).await?;
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
@@ -1001,7 +980,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
self.check_mutable().await?;
|
self.check_mutable().await?;
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/create_index/", self.identifier));
|
.post(&format!("/v1/table/{}/create_index/", self.name));
|
||||||
|
|
||||||
let column = match index.columns.len() {
|
let column = match index.columns.len() {
|
||||||
0 => {
|
0 => {
|
||||||
@@ -1020,18 +999,6 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
"column": column
|
"column": column
|
||||||
});
|
});
|
||||||
|
|
||||||
// Add name parameter if provided (for backwards compatibility, only include if Some)
|
|
||||||
if let Some(ref name) = index.name {
|
|
||||||
body["name"] = serde_json::Value::String(name.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Warn if train=false is specified since it's not meaningful
|
|
||||||
if !index.train {
|
|
||||||
log::warn!(
|
|
||||||
"train=false has no effect remote tables. The index will be created empty and automatically populated in the background."
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
match index.index {
|
match index.index {
|
||||||
// TODO: Should we pass the actual index parameters? SaaS does not
|
// TODO: Should we pass the actual index parameters? SaaS does not
|
||||||
// yet support them.
|
// yet support them.
|
||||||
@@ -1117,8 +1084,8 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
self.check_table_response(&request_id, response).await?;
|
self.check_table_response(&request_id, response).await?;
|
||||||
|
|
||||||
if let Some(wait_timeout) = index.wait_timeout {
|
if let Some(wait_timeout) = index.wait_timeout {
|
||||||
let index_name = index.name.unwrap_or_else(|| format!("{}_idx", column));
|
let name = format!("{}_idx", column);
|
||||||
self.wait_for_index(&[&index_name], wait_timeout).await?;
|
self.wait_for_index(&[&name], wait_timeout).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -1142,7 +1109,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
let query = MergeInsertRequest::try_from(params)?;
|
let query = MergeInsertRequest::try_from(params)?;
|
||||||
let mut request = self
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/merge_insert/", self.identifier))
|
.post(&format!("/v1/table/{}/merge_insert/", self.name))
|
||||||
.query(&query)
|
.query(&query)
|
||||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||||
|
|
||||||
@@ -1214,7 +1181,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
let body = serde_json::json!({ "new_columns": body });
|
let body = serde_json::json!({ "new_columns": body });
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/add_columns/", self.identifier))
|
.post(&format!("/v1/table/{}/add_columns/", self.name))
|
||||||
.json(&body);
|
.json(&body);
|
||||||
let (request_id, response) = self.send(request, true).await?;
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
let response = self.check_table_response(&request_id, response).await?;
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
@@ -1267,7 +1234,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
let body = serde_json::json!({ "alterations": body });
|
let body = serde_json::json!({ "alterations": body });
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/alter_columns/", self.identifier))
|
.post(&format!("/v1/table/{}/alter_columns/", self.name))
|
||||||
.json(&body);
|
.json(&body);
|
||||||
let (request_id, response) = self.send(request, true).await?;
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
let response = self.check_table_response(&request_id, response).await?;
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
@@ -1292,7 +1259,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
let body = serde_json::json!({ "columns": columns });
|
let body = serde_json::json!({ "columns": columns });
|
||||||
let request = self
|
let request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/drop_columns/", self.identifier))
|
.post(&format!("/v1/table/{}/drop_columns/", self.name))
|
||||||
.json(&body);
|
.json(&body);
|
||||||
let (request_id, response) = self.send(request, true).await?;
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
let response = self.check_table_response(&request_id, response).await?;
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
@@ -1316,7 +1283,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
// Make request to list the indices
|
// Make request to list the indices
|
||||||
let mut request = self
|
let mut request = self
|
||||||
.client
|
.client
|
||||||
.post(&format!("/v1/table/{}/index/list/", self.identifier));
|
.post(&format!("/v1/table/{}/index/list/", self.name));
|
||||||
let version = self.current_version().await;
|
let version = self.current_version().await;
|
||||||
let body = serde_json::json!({ "version": version });
|
let body = serde_json::json!({ "version": version });
|
||||||
request = request.json(&body);
|
request = request.json(&body);
|
||||||
@@ -1372,7 +1339,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
|
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
|
||||||
let mut request = self.client.post(&format!(
|
let mut request = self.client.post(&format!(
|
||||||
"/v1/table/{}/index/{}/stats/",
|
"/v1/table/{}/index/{}/stats/",
|
||||||
self.identifier, index_name
|
self.name, index_name
|
||||||
));
|
));
|
||||||
let version = self.current_version().await;
|
let version = self.current_version().await;
|
||||||
let body = serde_json::json!({ "version": version });
|
let body = serde_json::json!({ "version": version });
|
||||||
@@ -1400,7 +1367,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
async fn drop_index(&self, index_name: &str) -> Result<()> {
|
async fn drop_index(&self, index_name: &str) -> Result<()> {
|
||||||
let request = self.client.post(&format!(
|
let request = self.client.post(&format!(
|
||||||
"/v1/table/{}/index/{}/drop/",
|
"/v1/table/{}/index/{}/drop/",
|
||||||
self.identifier, index_name
|
self.name, index_name
|
||||||
));
|
));
|
||||||
let (request_id, response) = self.send(request, true).await?;
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
if response.status() == StatusCode::NOT_FOUND {
|
if response.status() == StatusCode::NOT_FOUND {
|
||||||
@@ -1428,9 +1395,7 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn stats(&self) -> Result<TableStatistics> {
|
async fn stats(&self) -> Result<TableStatistics> {
|
||||||
let request = self
|
let request = self.client.post(&format!("/v1/table/{}/stats/", self.name));
|
||||||
.client
|
|
||||||
.post(&format!("/v1/table/{}/stats/", self.identifier));
|
|
||||||
let (request_id, response) = self.send(request, true).await?;
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
let response = self.check_table_response(&request_id, response).await?;
|
let response = self.check_table_response(&request_id, response).await?;
|
||||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||||
@@ -3105,174 +3070,4 @@ mod tests {
|
|||||||
});
|
});
|
||||||
table
|
table
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_table_with_namespace_identifier() {
|
|
||||||
// Test that a table created with namespace uses the correct identifier in API calls
|
|
||||||
let table = Table::new_with_handler("ns1$ns2$table1", |request| {
|
|
||||||
assert_eq!(request.method(), "POST");
|
|
||||||
// All API calls should use the full identifier in the path
|
|
||||||
assert_eq!(request.url().path(), "/v1/table/ns1$ns2$table1/describe/");
|
|
||||||
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(r#"{"version": 1, "schema": { "fields": [] }}"#)
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
// The name() method should return just the base name, not the full identifier
|
|
||||||
assert_eq!(table.name(), "ns1$ns2$table1");
|
|
||||||
|
|
||||||
// API operations should work correctly
|
|
||||||
let version = table.version().await.unwrap();
|
|
||||||
assert_eq!(version, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_query_with_namespace() {
|
|
||||||
let table = Table::new_with_handler("analytics$events", |request| {
|
|
||||||
match request.url().path() {
|
|
||||||
"/v1/table/analytics$events/query/" => {
|
|
||||||
assert_eq!(request.method(), "POST");
|
|
||||||
|
|
||||||
// Return empty arrow stream
|
|
||||||
let data = RecordBatch::try_new(
|
|
||||||
Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
|
|
||||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
let body = write_ipc_file(&data);
|
|
||||||
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.header("Content-Type", ARROW_FILE_CONTENT_TYPE)
|
|
||||||
.body(body)
|
|
||||||
.unwrap()
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
panic!("Unexpected path: {}", request.url().path());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let results = table.query().execute().await.unwrap();
|
|
||||||
let batches = results.try_collect::<Vec<_>>().await.unwrap();
|
|
||||||
assert_eq!(batches.len(), 1);
|
|
||||||
assert_eq!(batches[0].num_rows(), 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_add_data_with_namespace() {
|
|
||||||
let data = RecordBatch::try_new(
|
|
||||||
Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])),
|
|
||||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let (sender, receiver) = std::sync::mpsc::channel();
|
|
||||||
let table = Table::new_with_handler("prod$metrics", move |mut request| {
|
|
||||||
if request.url().path() == "/v1/table/prod$metrics/insert/" {
|
|
||||||
assert_eq!(request.method(), "POST");
|
|
||||||
assert_eq!(
|
|
||||||
request.headers().get("Content-Type").unwrap(),
|
|
||||||
ARROW_STREAM_CONTENT_TYPE
|
|
||||||
);
|
|
||||||
let mut body_out = reqwest::Body::from(Vec::new());
|
|
||||||
std::mem::swap(request.body_mut().as_mut().unwrap(), &mut body_out);
|
|
||||||
sender.send(body_out).unwrap();
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(r#"{"version": 2}"#)
|
|
||||||
.unwrap()
|
|
||||||
} else {
|
|
||||||
panic!("Unexpected request path: {}", request.url().path());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let result = table
|
|
||||||
.add(RecordBatchIterator::new([Ok(data.clone())], data.schema()))
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(result.version, 2);
|
|
||||||
|
|
||||||
let body = receiver.recv().unwrap();
|
|
||||||
let body = collect_body(body).await;
|
|
||||||
let expected_body = write_ipc_stream(&data);
|
|
||||||
assert_eq!(&body, &expected_body);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_create_index_with_namespace() {
|
|
||||||
let table = Table::new_with_handler("dev$users", |request| {
|
|
||||||
match request.url().path() {
|
|
||||||
"/v1/table/dev$users/create_index/" => {
|
|
||||||
assert_eq!(request.method(), "POST");
|
|
||||||
assert_eq!(
|
|
||||||
request.headers().get("Content-Type").unwrap(),
|
|
||||||
JSON_CONTENT_TYPE
|
|
||||||
);
|
|
||||||
|
|
||||||
// Verify the request body contains the column name
|
|
||||||
if let Some(body) = request.body().unwrap().as_bytes() {
|
|
||||||
let body = std::str::from_utf8(body).unwrap();
|
|
||||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
|
||||||
assert_eq!(value["column"], "embedding");
|
|
||||||
assert_eq!(value["index_type"], "IVF_PQ");
|
|
||||||
}
|
|
||||||
|
|
||||||
http::Response::builder().status(200).body("").unwrap()
|
|
||||||
}
|
|
||||||
"/v1/table/dev$users/describe/" => {
|
|
||||||
// Needed for schema check in Auto index type
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(r#"{"version": 1, "schema": {"fields": [{"name": "embedding", "type": {"type": "list", "item": {"type": "float32"}}, "nullable": false}]}}"#)
|
|
||||||
.unwrap()
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
panic!("Unexpected path: {}", request.url().path());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
table
|
|
||||||
.create_index(&["embedding"], Index::IvfPq(IvfPqIndexBuilder::default()))
|
|
||||||
.execute()
|
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_drop_columns_with_namespace() {
|
|
||||||
let table = Table::new_with_handler("test$schema_ops", |request| {
|
|
||||||
assert_eq!(request.method(), "POST");
|
|
||||||
assert_eq!(
|
|
||||||
request.url().path(),
|
|
||||||
"/v1/table/test$schema_ops/drop_columns/"
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
request.headers().get("Content-Type").unwrap(),
|
|
||||||
JSON_CONTENT_TYPE
|
|
||||||
);
|
|
||||||
|
|
||||||
if let Some(body) = request.body().unwrap().as_bytes() {
|
|
||||||
let body = std::str::from_utf8(body).unwrap();
|
|
||||||
let value: serde_json::Value = serde_json::from_str(body).unwrap();
|
|
||||||
let columns = value["columns"].as_array().unwrap();
|
|
||||||
assert_eq!(columns.len(), 2);
|
|
||||||
assert_eq!(columns[0], "old_col1");
|
|
||||||
assert_eq!(columns[1], "old_col2");
|
|
||||||
}
|
|
||||||
|
|
||||||
http::Response::builder()
|
|
||||||
.status(200)
|
|
||||||
.body(r#"{"version": 5}"#)
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
let result = table.drop_columns(&["old_col1", "old_col2"]).await.unwrap();
|
|
||||||
assert_eq!(result.version, 5);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,11 +28,9 @@ use lance::dataset::{
|
|||||||
};
|
};
|
||||||
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
||||||
use lance::index::vector::utils::infer_vector_dim;
|
use lance::index::vector::utils::infer_vector_dim;
|
||||||
use lance::index::vector::VectorIndexParams;
|
|
||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
|
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
|
||||||
use lance_datafusion::utils::StreamingWriteSource;
|
use lance_datafusion::utils::StreamingWriteSource;
|
||||||
use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams};
|
|
||||||
use lance_index::vector::hnsw::builder::HnswBuildParams;
|
use lance_index::vector::hnsw::builder::HnswBuildParams;
|
||||||
use lance_index::vector::ivf::IvfBuildParams;
|
use lance_index::vector::ivf::IvfBuildParams;
|
||||||
use lance_index::vector::pq::PQBuildParams;
|
use lance_index::vector::pq::PQBuildParams;
|
||||||
@@ -52,7 +50,11 @@ use crate::arrow::IntoArrow;
|
|||||||
use crate::connection::NoData;
|
use crate::connection::NoData;
|
||||||
use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MaybeEmbedded, MemoryRegistry};
|
use crate::embeddings::{EmbeddingDefinition, EmbeddingRegistry, MaybeEmbedded, MemoryRegistry};
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::index::vector::{suggested_num_partitions_for_hnsw, VectorIndex};
|
use crate::index::scalar::FtsIndexBuilder;
|
||||||
|
use crate::index::vector::{
|
||||||
|
suggested_num_partitions_for_hnsw, IvfFlatIndexBuilder, IvfHnswPqIndexBuilder,
|
||||||
|
IvfHnswSqIndexBuilder, IvfPqIndexBuilder, VectorIndex,
|
||||||
|
};
|
||||||
use crate::index::IndexStatistics;
|
use crate::index::IndexStatistics;
|
||||||
use crate::index::{
|
use crate::index::{
|
||||||
vector::{suggested_num_partitions, suggested_num_sub_vectors},
|
vector::{suggested_num_partitions, suggested_num_sub_vectors},
|
||||||
@@ -509,10 +511,6 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
|||||||
fn as_any(&self) -> &dyn std::any::Any;
|
fn as_any(&self) -> &dyn std::any::Any;
|
||||||
/// Get the name of the table.
|
/// Get the name of the table.
|
||||||
fn name(&self) -> &str;
|
fn name(&self) -> &str;
|
||||||
/// Get the namespace of the table.
|
|
||||||
fn namespace(&self) -> &[String];
|
|
||||||
/// Get the id of the table
|
|
||||||
fn id(&self) -> &str;
|
|
||||||
/// Get the arrow [Schema] of the table.
|
/// Get the arrow [Schema] of the table.
|
||||||
async fn schema(&self) -> Result<SchemaRef>;
|
async fn schema(&self) -> Result<SchemaRef>;
|
||||||
/// Count the number of rows in this table.
|
/// Count the number of rows in this table.
|
||||||
@@ -1700,219 +1698,345 @@ impl NativeTable {
|
|||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper to validate index type compatibility with field data type
|
async fn create_ivf_flat_index(
|
||||||
fn validate_index_type(
|
&self,
|
||||||
|
index: IvfFlatIndexBuilder,
|
||||||
field: &Field,
|
field: &Field,
|
||||||
index_name: &str,
|
replace: bool,
|
||||||
supported_fn: impl Fn(&DataType) -> bool,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if !supported_fn(field.data_type()) {
|
if !supported_vector_data_type(field.data_type()) {
|
||||||
return Err(Error::Schema {
|
return Err(Error::InvalidInput {
|
||||||
message: format!(
|
message: format!(
|
||||||
"A {} index cannot be created on the field `{}` which has data type {}",
|
"An IVF Flat index cannot be created on the column `{}` which has data type {}",
|
||||||
index_name,
|
|
||||||
field.name(),
|
field.name(),
|
||||||
field.data_type()
|
field.data_type()
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let num_partitions = if let Some(n) = index.num_partitions {
|
||||||
|
n
|
||||||
|
} else {
|
||||||
|
suggested_num_partitions(self.count_rows(None).await?)
|
||||||
|
};
|
||||||
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
|
let lance_idx_params = lance::index::vector::VectorIndexParams::ivf_flat(
|
||||||
|
num_partitions as usize,
|
||||||
|
index.distance_type.into(),
|
||||||
|
);
|
||||||
|
dataset
|
||||||
|
.create_index(
|
||||||
|
&[field.name()],
|
||||||
|
IndexType::Vector,
|
||||||
|
None,
|
||||||
|
&lance_idx_params,
|
||||||
|
replace,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper to get num_partitions with default calculation
|
async fn create_ivf_pq_index(
|
||||||
async fn get_num_partitions(
|
|
||||||
&self,
|
&self,
|
||||||
provided: Option<u32>,
|
index: IvfPqIndexBuilder,
|
||||||
for_hnsw: bool,
|
field: &Field,
|
||||||
dim: Option<u32>,
|
replace: bool,
|
||||||
) -> Result<u32> {
|
) -> Result<()> {
|
||||||
if let Some(n) = provided {
|
if !supported_vector_data_type(field.data_type()) {
|
||||||
Ok(n)
|
return Err(Error::InvalidInput {
|
||||||
|
message: format!(
|
||||||
|
"An IVF PQ index cannot be created on the column `{}` which has data type {}",
|
||||||
|
field.name(),
|
||||||
|
field.data_type()
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let num_partitions = if let Some(n) = index.num_partitions {
|
||||||
|
n
|
||||||
} else {
|
} else {
|
||||||
let row_count = self.count_rows(None).await?;
|
suggested_num_partitions(self.count_rows(None).await?)
|
||||||
if for_hnsw {
|
};
|
||||||
Ok(suggested_num_partitions_for_hnsw(
|
let num_sub_vectors: u32 = if let Some(n) = index.num_sub_vectors {
|
||||||
row_count,
|
n
|
||||||
dim.ok_or_else(|| Error::InvalidInput {
|
} else {
|
||||||
message: "Vector dimension required for HNSW partitioning".to_string(),
|
let dim = infer_vector_dim(field.data_type())?;
|
||||||
})?,
|
suggested_num_sub_vectors(dim as u32)
|
||||||
))
|
};
|
||||||
} else {
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
Ok(suggested_num_partitions(row_count))
|
let lance_idx_params = lance::index::vector::VectorIndexParams::ivf_pq(
|
||||||
}
|
num_partitions as usize,
|
||||||
|
/*num_bits=*/ 8,
|
||||||
|
num_sub_vectors as usize,
|
||||||
|
index.distance_type.into(),
|
||||||
|
index.max_iterations as usize,
|
||||||
|
);
|
||||||
|
dataset
|
||||||
|
.create_index(
|
||||||
|
&[field.name()],
|
||||||
|
IndexType::Vector,
|
||||||
|
None,
|
||||||
|
&lance_idx_params,
|
||||||
|
replace,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_ivf_hnsw_pq_index(
|
||||||
|
&self,
|
||||||
|
index: IvfHnswPqIndexBuilder,
|
||||||
|
field: &Field,
|
||||||
|
replace: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
if !supported_vector_data_type(field.data_type()) {
|
||||||
|
return Err(Error::InvalidInput {
|
||||||
|
message: format!(
|
||||||
|
"An IVF HNSW PQ index cannot be created on the column `{}` which has data type {}",
|
||||||
|
field.name(),
|
||||||
|
field.data_type()
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let num_partitions: u32 = if let Some(n) = index.num_partitions {
|
||||||
|
n
|
||||||
|
} else {
|
||||||
|
match field.data_type() {
|
||||||
|
arrow_schema::DataType::FixedSizeList(_, n) => Ok::<u32, Error>(
|
||||||
|
suggested_num_partitions_for_hnsw(self.count_rows(None).await?, *n as u32),
|
||||||
|
),
|
||||||
|
_ => Err(Error::Schema {
|
||||||
|
message: format!("Column '{}' is not a FixedSizeList", field.name()),
|
||||||
|
}),
|
||||||
|
}?
|
||||||
|
};
|
||||||
|
|
||||||
|
let num_sub_vectors: u32 = if let Some(n) = index.num_sub_vectors {
|
||||||
|
n
|
||||||
|
} else {
|
||||||
|
match field.data_type() {
|
||||||
|
arrow_schema::DataType::FixedSizeList(_, n) => {
|
||||||
|
Ok::<u32, Error>(suggested_num_sub_vectors(*n as u32))
|
||||||
|
}
|
||||||
|
_ => Err(Error::Schema {
|
||||||
|
message: format!("Column '{}' is not a FixedSizeList", field.name()),
|
||||||
|
}),
|
||||||
|
}?
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
|
let mut ivf_params = IvfBuildParams::new(num_partitions as usize);
|
||||||
|
ivf_params.sample_rate = index.sample_rate as usize;
|
||||||
|
ivf_params.max_iters = index.max_iterations as usize;
|
||||||
|
let hnsw_params = HnswBuildParams::default()
|
||||||
|
.num_edges(index.m as usize)
|
||||||
|
.ef_construction(index.ef_construction as usize);
|
||||||
|
let pq_params = PQBuildParams {
|
||||||
|
num_sub_vectors: num_sub_vectors as usize,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let lance_idx_params = lance::index::vector::VectorIndexParams::with_ivf_hnsw_pq_params(
|
||||||
|
index.distance_type.into(),
|
||||||
|
ivf_params,
|
||||||
|
hnsw_params,
|
||||||
|
pq_params,
|
||||||
|
);
|
||||||
|
dataset
|
||||||
|
.create_index(
|
||||||
|
&[field.name()],
|
||||||
|
IndexType::Vector,
|
||||||
|
None,
|
||||||
|
&lance_idx_params,
|
||||||
|
replace,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_ivf_hnsw_sq_index(
|
||||||
|
&self,
|
||||||
|
index: IvfHnswSqIndexBuilder,
|
||||||
|
field: &Field,
|
||||||
|
replace: bool,
|
||||||
|
) -> Result<()> {
|
||||||
|
if !supported_vector_data_type(field.data_type()) {
|
||||||
|
return Err(Error::InvalidInput {
|
||||||
|
message: format!(
|
||||||
|
"An IVF HNSW SQ index cannot be created on the column `{}` which has data type {}",
|
||||||
|
field.name(),
|
||||||
|
field.data_type()
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let num_partitions: u32 = if let Some(n) = index.num_partitions {
|
||||||
|
n
|
||||||
|
} else {
|
||||||
|
match field.data_type() {
|
||||||
|
arrow_schema::DataType::FixedSizeList(_, n) => Ok::<u32, Error>(
|
||||||
|
suggested_num_partitions_for_hnsw(self.count_rows(None).await?, *n as u32),
|
||||||
|
),
|
||||||
|
_ => Err(Error::Schema {
|
||||||
|
message: format!("Column '{}' is not a FixedSizeList", field.name()),
|
||||||
|
}),
|
||||||
|
}?
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
|
let mut ivf_params = IvfBuildParams::new(num_partitions as usize);
|
||||||
|
ivf_params.sample_rate = index.sample_rate as usize;
|
||||||
|
ivf_params.max_iters = index.max_iterations as usize;
|
||||||
|
let hnsw_params = HnswBuildParams::default()
|
||||||
|
.num_edges(index.m as usize)
|
||||||
|
.ef_construction(index.ef_construction as usize);
|
||||||
|
let sq_params = SQBuildParams {
|
||||||
|
sample_rate: index.sample_rate as usize,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let lance_idx_params = lance::index::vector::VectorIndexParams::with_ivf_hnsw_sq_params(
|
||||||
|
index.distance_type.into(),
|
||||||
|
ivf_params,
|
||||||
|
hnsw_params,
|
||||||
|
sq_params,
|
||||||
|
);
|
||||||
|
dataset
|
||||||
|
.create_index(
|
||||||
|
&[field.name()],
|
||||||
|
IndexType::Vector,
|
||||||
|
None,
|
||||||
|
&lance_idx_params,
|
||||||
|
replace,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_auto_index(&self, field: &Field, opts: IndexBuilder) -> Result<()> {
|
||||||
|
if supported_vector_data_type(field.data_type()) {
|
||||||
|
self.create_ivf_pq_index(IvfPqIndexBuilder::default(), field, opts.replace)
|
||||||
|
.await
|
||||||
|
} else if supported_btree_data_type(field.data_type()) {
|
||||||
|
self.create_btree_index(field, opts).await
|
||||||
|
} else {
|
||||||
|
Err(Error::InvalidInput {
|
||||||
|
message: format!(
|
||||||
|
"there are no indices supported for the field `{}` with the data type {}",
|
||||||
|
field.name(),
|
||||||
|
field.data_type()
|
||||||
|
),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper to get num_sub_vectors with default calculation
|
async fn create_btree_index(&self, field: &Field, opts: IndexBuilder) -> Result<()> {
|
||||||
fn get_num_sub_vectors(provided: Option<u32>, dim: u32) -> u32 {
|
if !supported_btree_data_type(field.data_type()) {
|
||||||
provided.unwrap_or_else(|| suggested_num_sub_vectors(dim))
|
return Err(Error::Schema {
|
||||||
}
|
message: format!(
|
||||||
|
"A BTree index cannot be created on the field `{}` which has data type {}",
|
||||||
// Helper to extract vector dimension from field
|
field.name(),
|
||||||
fn get_vector_dimension(field: &Field) -> Result<u32> {
|
field.data_type()
|
||||||
match field.data_type() {
|
),
|
||||||
arrow_schema::DataType::FixedSizeList(_, n) => Ok(*n as u32),
|
});
|
||||||
_ => Ok(infer_vector_dim(field.data_type())? as u32),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
|
let lance_idx_params = lance_index::scalar::ScalarIndexParams {
|
||||||
|
force_index_type: Some(lance_index::scalar::ScalarIndexType::BTree),
|
||||||
|
};
|
||||||
|
dataset
|
||||||
|
.create_index(
|
||||||
|
&[field.name()],
|
||||||
|
IndexType::BTree,
|
||||||
|
None,
|
||||||
|
&lance_idx_params,
|
||||||
|
opts.replace,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert LanceDB Index to Lance IndexParams
|
async fn create_bitmap_index(&self, field: &Field, opts: IndexBuilder) -> Result<()> {
|
||||||
async fn make_index_params(
|
if !supported_bitmap_data_type(field.data_type()) {
|
||||||
|
return Err(Error::Schema {
|
||||||
|
message: format!(
|
||||||
|
"A Bitmap index cannot be created on the field `{}` which has data type {}",
|
||||||
|
field.name(),
|
||||||
|
field.data_type()
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
|
let lance_idx_params = lance_index::scalar::ScalarIndexParams {
|
||||||
|
force_index_type: Some(lance_index::scalar::ScalarIndexType::Bitmap),
|
||||||
|
};
|
||||||
|
dataset
|
||||||
|
.create_index(
|
||||||
|
&[field.name()],
|
||||||
|
IndexType::Bitmap,
|
||||||
|
None,
|
||||||
|
&lance_idx_params,
|
||||||
|
opts.replace,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_label_list_index(&self, field: &Field, opts: IndexBuilder) -> Result<()> {
|
||||||
|
if !supported_label_list_data_type(field.data_type()) {
|
||||||
|
return Err(Error::Schema {
|
||||||
|
message: format!(
|
||||||
|
"A LabelList index cannot be created on the field `{}` which has data type {}",
|
||||||
|
field.name(),
|
||||||
|
field.data_type()
|
||||||
|
),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
|
let lance_idx_params = lance_index::scalar::ScalarIndexParams {
|
||||||
|
force_index_type: Some(lance_index::scalar::ScalarIndexType::LabelList),
|
||||||
|
};
|
||||||
|
dataset
|
||||||
|
.create_index(
|
||||||
|
&[field.name()],
|
||||||
|
IndexType::LabelList,
|
||||||
|
None,
|
||||||
|
&lance_idx_params,
|
||||||
|
opts.replace,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_fts_index(
|
||||||
&self,
|
&self,
|
||||||
field: &Field,
|
field: &Field,
|
||||||
index_opts: Index,
|
fts_opts: FtsIndexBuilder,
|
||||||
) -> Result<Box<dyn lance::index::IndexParams>> {
|
replace: bool,
|
||||||
match index_opts {
|
) -> Result<()> {
|
||||||
Index::Auto => {
|
if !supported_fts_data_type(field.data_type()) {
|
||||||
if supported_vector_data_type(field.data_type()) {
|
return Err(Error::Schema {
|
||||||
// Use IvfPq as the default for auto vector indices
|
message: format!(
|
||||||
let dim = Self::get_vector_dimension(field)?;
|
"A FTS index cannot be created on the field `{}` which has data type {}",
|
||||||
let num_partitions = self.get_num_partitions(None, false, None).await?;
|
field.name(),
|
||||||
let num_sub_vectors = Self::get_num_sub_vectors(None, dim);
|
field.data_type()
|
||||||
let lance_idx_params = lance::index::vector::VectorIndexParams::ivf_pq(
|
),
|
||||||
num_partitions as usize,
|
});
|
||||||
/*num_bits=*/ 8,
|
|
||||||
num_sub_vectors as usize,
|
|
||||||
lance_linalg::distance::MetricType::L2,
|
|
||||||
/*max_iterations=*/ 50,
|
|
||||||
);
|
|
||||||
Ok(Box::new(lance_idx_params))
|
|
||||||
} else if supported_btree_data_type(field.data_type()) {
|
|
||||||
Ok(Box::new(ScalarIndexParams::for_builtin(
|
|
||||||
BuiltinIndexType::BTree,
|
|
||||||
)))
|
|
||||||
} else {
|
|
||||||
return Err(Error::InvalidInput {
|
|
||||||
message: format!(
|
|
||||||
"there are no indices supported for the field `{}` with the data type {}",
|
|
||||||
field.name(),
|
|
||||||
field.data_type()
|
|
||||||
),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Index::BTree(_) => {
|
|
||||||
Self::validate_index_type(field, "BTree", supported_btree_data_type)?;
|
|
||||||
Ok(Box::new(ScalarIndexParams::for_builtin(
|
|
||||||
BuiltinIndexType::BTree,
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
Index::Bitmap(_) => {
|
|
||||||
Self::validate_index_type(field, "Bitmap", supported_bitmap_data_type)?;
|
|
||||||
Ok(Box::new(ScalarIndexParams::for_builtin(
|
|
||||||
BuiltinIndexType::Bitmap,
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
Index::LabelList(_) => {
|
|
||||||
Self::validate_index_type(field, "LabelList", supported_label_list_data_type)?;
|
|
||||||
Ok(Box::new(ScalarIndexParams::for_builtin(
|
|
||||||
BuiltinIndexType::LabelList,
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
Index::FTS(fts_opts) => {
|
|
||||||
Self::validate_index_type(field, "FTS", supported_fts_data_type)?;
|
|
||||||
Ok(Box::new(fts_opts))
|
|
||||||
}
|
|
||||||
Index::IvfFlat(index) => {
|
|
||||||
Self::validate_index_type(field, "IVF Flat", supported_vector_data_type)?;
|
|
||||||
let num_partitions = self
|
|
||||||
.get_num_partitions(index.num_partitions, false, None)
|
|
||||||
.await?;
|
|
||||||
let lance_idx_params = VectorIndexParams::ivf_flat(
|
|
||||||
num_partitions as usize,
|
|
||||||
index.distance_type.into(),
|
|
||||||
);
|
|
||||||
Ok(Box::new(lance_idx_params))
|
|
||||||
}
|
|
||||||
Index::IvfPq(index) => {
|
|
||||||
Self::validate_index_type(field, "IVF PQ", supported_vector_data_type)?;
|
|
||||||
let dim = Self::get_vector_dimension(field)?;
|
|
||||||
let num_partitions = self
|
|
||||||
.get_num_partitions(index.num_partitions, false, None)
|
|
||||||
.await?;
|
|
||||||
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
|
|
||||||
let lance_idx_params = VectorIndexParams::ivf_pq(
|
|
||||||
num_partitions as usize,
|
|
||||||
/*num_bits=*/ 8,
|
|
||||||
num_sub_vectors as usize,
|
|
||||||
index.distance_type.into(),
|
|
||||||
index.max_iterations as usize,
|
|
||||||
);
|
|
||||||
Ok(Box::new(lance_idx_params))
|
|
||||||
}
|
|
||||||
Index::IvfHnswPq(index) => {
|
|
||||||
Self::validate_index_type(field, "IVF HNSW PQ", supported_vector_data_type)?;
|
|
||||||
let dim = Self::get_vector_dimension(field)?;
|
|
||||||
let num_partitions = self
|
|
||||||
.get_num_partitions(index.num_partitions, true, Some(dim))
|
|
||||||
.await?;
|
|
||||||
let num_sub_vectors = Self::get_num_sub_vectors(index.num_sub_vectors, dim);
|
|
||||||
let mut ivf_params = IvfBuildParams::new(num_partitions as usize);
|
|
||||||
ivf_params.sample_rate = index.sample_rate as usize;
|
|
||||||
ivf_params.max_iters = index.max_iterations as usize;
|
|
||||||
let hnsw_params = HnswBuildParams::default()
|
|
||||||
.num_edges(index.m as usize)
|
|
||||||
.ef_construction(index.ef_construction as usize);
|
|
||||||
let pq_params = PQBuildParams {
|
|
||||||
num_sub_vectors: num_sub_vectors as usize,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let lance_idx_params = VectorIndexParams::with_ivf_hnsw_pq_params(
|
|
||||||
index.distance_type.into(),
|
|
||||||
ivf_params,
|
|
||||||
hnsw_params,
|
|
||||||
pq_params,
|
|
||||||
);
|
|
||||||
Ok(Box::new(lance_idx_params))
|
|
||||||
}
|
|
||||||
Index::IvfHnswSq(index) => {
|
|
||||||
Self::validate_index_type(field, "IVF HNSW SQ", supported_vector_data_type)?;
|
|
||||||
let dim = Self::get_vector_dimension(field)?;
|
|
||||||
let num_partitions = self
|
|
||||||
.get_num_partitions(index.num_partitions, true, Some(dim))
|
|
||||||
.await?;
|
|
||||||
let mut ivf_params = IvfBuildParams::new(num_partitions as usize);
|
|
||||||
ivf_params.sample_rate = index.sample_rate as usize;
|
|
||||||
ivf_params.max_iters = index.max_iterations as usize;
|
|
||||||
let hnsw_params = HnswBuildParams::default()
|
|
||||||
.num_edges(index.m as usize)
|
|
||||||
.ef_construction(index.ef_construction as usize);
|
|
||||||
let sq_params = SQBuildParams {
|
|
||||||
sample_rate: index.sample_rate as usize,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let lance_idx_params = VectorIndexParams::with_ivf_hnsw_sq_params(
|
|
||||||
index.distance_type.into(),
|
|
||||||
ivf_params,
|
|
||||||
hnsw_params,
|
|
||||||
sq_params,
|
|
||||||
);
|
|
||||||
Ok(Box::new(lance_idx_params))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Helper method to get the correct IndexType based on the Index variant and field data type
|
let mut dataset = self.dataset.get_mut().await?;
|
||||||
fn get_index_type_for_field(&self, field: &Field, index: &Index) -> IndexType {
|
dataset
|
||||||
match index {
|
.create_index(
|
||||||
Index::Auto => {
|
&[field.name()],
|
||||||
if supported_vector_data_type(field.data_type()) {
|
IndexType::Inverted,
|
||||||
IndexType::Vector
|
None,
|
||||||
} else if supported_btree_data_type(field.data_type()) {
|
&fts_opts,
|
||||||
IndexType::BTree
|
replace,
|
||||||
} else {
|
)
|
||||||
// This should not happen since make_index_params would have failed
|
.await?;
|
||||||
IndexType::BTree
|
Ok(())
|
||||||
}
|
|
||||||
}
|
|
||||||
Index::BTree(_) => IndexType::BTree,
|
|
||||||
Index::Bitmap(_) => IndexType::Bitmap,
|
|
||||||
Index::LabelList(_) => IndexType::LabelList,
|
|
||||||
Index::FTS(_) => IndexType::Inverted,
|
|
||||||
Index::IvfFlat(_) | Index::IvfPq(_) | Index::IvfHnswPq(_) | Index::IvfHnswSq(_) => {
|
|
||||||
IndexType::Vector
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn generic_query(
|
async fn generic_query(
|
||||||
@@ -2019,16 +2143,6 @@ impl BaseTable for NativeTable {
|
|||||||
self.name.as_str()
|
self.name.as_str()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn namespace(&self) -> &[String] {
|
|
||||||
// Native tables don't support namespaces yet, return empty slice for root namespace
|
|
||||||
&[]
|
|
||||||
}
|
|
||||||
|
|
||||||
fn id(&self) -> &str {
|
|
||||||
// For native tables, id is same as name since no namespace support
|
|
||||||
self.name.as_str()
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn version(&self) -> Result<u64> {
|
async fn version(&self) -> Result<u64> {
|
||||||
Ok(self.dataset.get().await?.version().version)
|
Ok(self.dataset.get().await?.version().version)
|
||||||
}
|
}
|
||||||
@@ -2137,20 +2251,26 @@ impl BaseTable for NativeTable {
|
|||||||
|
|
||||||
let field = schema.field_with_name(&opts.columns[0])?;
|
let field = schema.field_with_name(&opts.columns[0])?;
|
||||||
|
|
||||||
let lance_idx_params = self.make_index_params(field, opts.index.clone()).await?;
|
match opts.index {
|
||||||
let index_type = self.get_index_type_for_field(field, &opts.index);
|
Index::Auto => self.create_auto_index(field, opts).await,
|
||||||
let columns = [field.name().as_str()];
|
Index::BTree(_) => self.create_btree_index(field, opts).await,
|
||||||
let mut dataset = self.dataset.get_mut().await?;
|
Index::Bitmap(_) => self.create_bitmap_index(field, opts).await,
|
||||||
let mut builder = dataset
|
Index::LabelList(_) => self.create_label_list_index(field, opts).await,
|
||||||
.create_index_builder(&columns, index_type, lance_idx_params.as_ref())
|
Index::FTS(fts_opts) => self.create_fts_index(field, fts_opts, opts.replace).await,
|
||||||
.train(opts.train)
|
Index::IvfFlat(ivf_flat) => {
|
||||||
.replace(opts.replace);
|
self.create_ivf_flat_index(ivf_flat, field, opts.replace)
|
||||||
|
.await
|
||||||
if let Some(name) = opts.name {
|
}
|
||||||
builder = builder.name(name);
|
Index::IvfPq(ivf_pq) => self.create_ivf_pq_index(ivf_pq, field, opts.replace).await,
|
||||||
|
Index::IvfHnswPq(ivf_hnsw_pq) => {
|
||||||
|
self.create_ivf_hnsw_pq_index(ivf_hnsw_pq, field, opts.replace)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
Index::IvfHnswSq(ivf_hnsw_sq) => {
|
||||||
|
self.create_ivf_hnsw_sq_index(ivf_hnsw_sq, field, opts.replace)
|
||||||
|
.await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
builder.await?;
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn drop_index(&self, index_name: &str) -> Result<()> {
|
async fn drop_index(&self, index_name: &str) -> Result<()> {
|
||||||
@@ -2770,7 +2890,6 @@ mod tests {
|
|||||||
use crate::connect;
|
use crate::connect;
|
||||||
use crate::connection::ConnectBuilder;
|
use crate::connection::ConnectBuilder;
|
||||||
use crate::index::scalar::{BTreeIndexBuilder, BitmapIndexBuilder};
|
use crate::index::scalar::{BTreeIndexBuilder, BitmapIndexBuilder};
|
||||||
use crate::index::vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder};
|
|
||||||
use crate::query::{ExecutableQuery, QueryBase};
|
use crate::query::{ExecutableQuery, QueryBase};
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -3272,7 +3391,6 @@ mod tests {
|
|||||||
fn wrap(
|
fn wrap(
|
||||||
&self,
|
&self,
|
||||||
original: Arc<dyn object_store::ObjectStore>,
|
original: Arc<dyn object_store::ObjectStore>,
|
||||||
_storage_options: Option<&std::collections::HashMap<String, String>>,
|
|
||||||
) -> Arc<dyn object_store::ObjectStore> {
|
) -> Arc<dyn object_store::ObjectStore> {
|
||||||
self.called.store(true, Ordering::Relaxed);
|
self.called.store(true, Ordering::Relaxed);
|
||||||
original
|
original
|
||||||
|
|||||||
@@ -121,10 +121,6 @@ impl ExecutionPlan for MetadataEraserExec {
|
|||||||
as SendableRecordBatchStream,
|
as SendableRecordBatchStream,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn partition_statistics(&self, partition: Option<usize>) -> DataFusionResult<Statistics> {
|
|
||||||
self.input.partition_statistics(partition)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -231,7 +227,6 @@ pub mod tests {
|
|||||||
prelude::{SessionConfig, SessionContext},
|
prelude::{SessionConfig, SessionContext},
|
||||||
};
|
};
|
||||||
use datafusion_catalog::TableProvider;
|
use datafusion_catalog::TableProvider;
|
||||||
use datafusion_common::stats::Precision;
|
|
||||||
use datafusion_execution::SendableRecordBatchStream;
|
use datafusion_execution::SendableRecordBatchStream;
|
||||||
use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
|
use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
|
||||||
use futures::{StreamExt, TryStreamExt};
|
use futures::{StreamExt, TryStreamExt};
|
||||||
@@ -500,7 +495,6 @@ pub mod tests {
|
|||||||
plan,
|
plan,
|
||||||
"MetadataEraserExec
|
"MetadataEraserExec
|
||||||
ProjectionExec:...
|
ProjectionExec:...
|
||||||
CooperativeExec...
|
|
||||||
LanceRead:...",
|
LanceRead:...",
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
@@ -515,24 +509,4 @@ pub mod tests {
|
|||||||
|
|
||||||
TestFixture::check_plan(plan, "").await;
|
TestFixture::check_plan(plan, "").await;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
|
||||||
async fn test_metadata_eraser_propagates_statistics() {
|
|
||||||
let fixture = TestFixture::new().await;
|
|
||||||
|
|
||||||
let plan =
|
|
||||||
LogicalPlanBuilder::scan("foo", provider_as_source(fixture.adapter.clone()), None)
|
|
||||||
.unwrap()
|
|
||||||
.build()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let ctx = SessionContext::new();
|
|
||||||
let physical_plan = ctx.state().create_physical_plan(&plan).await.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(physical_plan.name(), "MetadataEraserExec");
|
|
||||||
|
|
||||||
let partition_stats = physical_plan.partition_statistics(None).unwrap();
|
|
||||||
|
|
||||||
assert!(matches!(partition_stats.num_rows, Precision::Exact(10)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -130,7 +130,7 @@ async fn test_minio_lifecycle() -> Result<()> {
|
|||||||
let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
|
let data = RecordBatchIterator::new(vec![Ok(data.clone())], data.schema());
|
||||||
table.add(data).execute().await?;
|
table.add(data).execute().await?;
|
||||||
|
|
||||||
db.drop_table("test_table", &[]).await?;
|
db.drop_table("test_table").await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user