Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
9c2e21ec33 Bump version: 0.22.2-beta.1 → 0.22.2-beta.2 2025-10-06 18:10:02 +00:00
17 changed files with 354 additions and 729 deletions

667
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,30 +15,30 @@ categories = ["database-implementations"]
rust-version = "1.78.0" rust-version = "1.78.0"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.38.2", default-features = false, "features" = ["dynamodb"] } lance = { "version" = "=0.38.0", default-features = false, "features" = ["dynamodb"] }
lance-io = { "version" = "=0.38.2", default-features = false } lance-io = { "version" = "=0.38.0", default-features = false }
lance-index = "=0.38.2" lance-index = "=0.38.0"
lance-linalg = "=0.38.2" lance-linalg = "=0.38.0"
lance-table = "=0.38.2" lance-table = "=0.38.0"
lance-testing = "=0.38.2" lance-testing = "=0.38.0"
lance-datafusion = "=0.38.2" lance-datafusion = "=0.38.0"
lance-encoding = "=0.38.2" lance-encoding = "=0.38.0"
lance-namespace = "0.0.18" lance-namespace = "0.0.16"
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false } arrow = { version = "55.1", optional = false }
arrow-array = "56.2" arrow-array = "55.1"
arrow-data = "56.2" arrow-data = "55.1"
arrow-ipc = "56.2" arrow-ipc = "55.1"
arrow-ord = "56.2" arrow-ord = "55.1"
arrow-schema = "56.2" arrow-schema = "55.1"
arrow-cast = "56.2" arrow-cast = "55.1"
async-trait = "0" async-trait = "0"
datafusion = { version = "50.1", default-features = false } datafusion = { version = "49.0", default-features = false }
datafusion-catalog = "50.1" datafusion-catalog = "49.0"
datafusion-common = { version = "50.1", default-features = false } datafusion-common = { version = "49.0", default-features = false }
datafusion-execution = "50.1" datafusion-execution = "49.0"
datafusion-expr = "50.1" datafusion-expr = "49.0"
datafusion-physical-plan = "50.1" datafusion-physical-plan = "49.0"
env_logger = "0.11" env_logger = "0.11"
half = { "version" = "2.6.0", default-features = false, features = [ half = { "version" = "2.6.0", default-features = false, features = [
"num-traits", "num-traits",

View File

@@ -84,7 +84,6 @@ plugins:
'examples.md': 'https://lancedb.com/docs/tutorials/' 'examples.md': 'https://lancedb.com/docs/tutorials/'
'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/' 'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/' 'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'

View File

@@ -1,184 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import * as arrow from "../lancedb/arrow";
import { sanitizeField, sanitizeType } from "../lancedb/sanitize";
describe("sanitize", function () {
describe("sanitizeType function", function () {
it("should handle type objects", function () {
const type = new arrow.Int32();
const result = sanitizeType(type);
expect(result.typeId).toBe(arrow.Type.Int);
expect((result as arrow.Int).bitWidth).toBe(32);
expect((result as arrow.Int).isSigned).toBe(true);
const floatType = {
typeId: 3, // Type.Float = 3
precision: 2,
toString: () => "Float",
isFloat: true,
isFixedWidth: true,
};
const floatResult = sanitizeType(floatType);
expect(floatResult).toBeInstanceOf(arrow.DataType);
expect(floatResult.typeId).toBe(arrow.Type.Float);
const floatResult2 = sanitizeType({ ...floatType, typeId: () => 3 });
expect(floatResult2).toBeInstanceOf(arrow.DataType);
expect(floatResult2.typeId).toBe(arrow.Type.Float);
});
const allTypeNameTestCases = [
["null", new arrow.Null()],
["binary", new arrow.Binary()],
["utf8", new arrow.Utf8()],
["bool", new arrow.Bool()],
["int8", new arrow.Int8()],
["int16", new arrow.Int16()],
["int32", new arrow.Int32()],
["int64", new arrow.Int64()],
["uint8", new arrow.Uint8()],
["uint16", new arrow.Uint16()],
["uint32", new arrow.Uint32()],
["uint64", new arrow.Uint64()],
["float16", new arrow.Float16()],
["float32", new arrow.Float32()],
["float64", new arrow.Float64()],
["datemillisecond", new arrow.DateMillisecond()],
["dateday", new arrow.DateDay()],
["timenanosecond", new arrow.TimeNanosecond()],
["timemicrosecond", new arrow.TimeMicrosecond()],
["timemillisecond", new arrow.TimeMillisecond()],
["timesecond", new arrow.TimeSecond()],
["intervaldaytime", new arrow.IntervalDayTime()],
["intervalyearmonth", new arrow.IntervalYearMonth()],
["durationnanosecond", new arrow.DurationNanosecond()],
["durationmicrosecond", new arrow.DurationMicrosecond()],
["durationmillisecond", new arrow.DurationMillisecond()],
["durationsecond", new arrow.DurationSecond()],
] as const;
it.each(allTypeNameTestCases)(
'should map type name "%s" to %s',
function (name, expected) {
const result = sanitizeType(name);
expect(result).toBeInstanceOf(expected.constructor);
},
);
const caseVariationTestCases = [
["NULL", new arrow.Null()],
["Utf8", new arrow.Utf8()],
["FLOAT32", new arrow.Float32()],
["DaTedAy", new arrow.DateDay()],
] as const;
it.each(caseVariationTestCases)(
'should be case insensitive for type name "%s" mapped to %s',
function (name, expected) {
const result = sanitizeType(name);
expect(result).toBeInstanceOf(expected.constructor);
},
);
it("should throw error for unrecognized type name", function () {
expect(() => sanitizeType("invalid_type")).toThrow(
"Unrecognized type name in schema: invalid_type",
);
});
});
describe("sanitizeField function", function () {
it("should handle field with string type name", function () {
const field = sanitizeField({
name: "string_field",
type: "utf8",
nullable: true,
metadata: new Map([["key", "value"]]),
});
expect(field).toBeInstanceOf(arrow.Field);
expect(field.name).toBe("string_field");
expect(field.type).toBeInstanceOf(arrow.Utf8);
expect(field.nullable).toBe(true);
expect(field.metadata?.get("key")).toBe("value");
});
it("should handle field with type object", function () {
const floatType = {
typeId: 3, // Float
precision: 32,
};
const field = sanitizeField({
name: "float_field",
type: floatType,
nullable: false,
});
expect(field).toBeInstanceOf(arrow.Field);
expect(field.name).toBe("float_field");
expect(field.type).toBeInstanceOf(arrow.DataType);
expect(field.type.typeId).toBe(arrow.Type.Float);
expect((field.type as arrow.Float64).precision).toBe(32);
expect(field.nullable).toBe(false);
});
it("should handle field with direct Type instance", function () {
const field = sanitizeField({
name: "bool_field",
type: new arrow.Bool(),
nullable: true,
});
expect(field).toBeInstanceOf(arrow.Field);
expect(field.name).toBe("bool_field");
expect(field.type).toBeInstanceOf(arrow.Bool);
expect(field.nullable).toBe(true);
});
it("should throw error for invalid field object", function () {
expect(() =>
sanitizeField({
type: "int32",
nullable: true,
}),
).toThrow(
"The field passed in is missing a `type`/`name`/`nullable` property",
);
// Invalid type
expect(() =>
sanitizeField({
name: "invalid",
type: { invalid: true },
nullable: true,
}),
).toThrow("Expected a Type to have a typeId property");
// Invalid nullable
expect(() =>
sanitizeField({
name: "invalid_nullable",
type: "int32",
nullable: "not a boolean",
}),
).toThrow("The field passed in had a non-boolean `nullable` property");
});
it("should report error for invalid type name", function () {
expect(() =>
sanitizeField({
name: "invalid_field",
type: "invalid_type",
nullable: true,
}),
).toThrow(
"Unable to sanitize type for field: invalid_field due to error: Error: Unrecognized type name in schema: invalid_type",
);
});
});
});

View File

@@ -10,13 +10,7 @@ import * as arrow16 from "apache-arrow-16";
import * as arrow17 from "apache-arrow-17"; import * as arrow17 from "apache-arrow-17";
import * as arrow18 from "apache-arrow-18"; import * as arrow18 from "apache-arrow-18";
import { import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
Connection,
MatchQuery,
PhraseQuery,
Table,
connect,
} from "../lancedb";
import { import {
Table as ArrowTable, Table as ArrowTable,
Field, Field,
@@ -27,8 +21,6 @@ import {
Int64, Int64,
List, List,
Schema, Schema,
SchemaLike,
Type,
Uint8, Uint8,
Utf8, Utf8,
makeArrowTable, makeArrowTable,
@@ -2027,52 +2019,3 @@ describe("column name options", () => {
expect(results2.length).toBe(10); expect(results2.length).toBe(10);
}); });
}); });
describe("when creating an empty table", () => {
let con: Connection;
beforeEach(async () => {
const tmpDir = tmp.dirSync({ unsafeCleanup: true });
con = await connect(tmpDir.name);
});
afterEach(() => {
con.close();
});
it("can create an empty table from an arrow Schema", async () => {
const schema = new Schema([
new Field("id", new Int64()),
new Field("vector", new Float64()),
]);
const table = await con.createEmptyTable("test", schema);
const actualSchema = await table.schema();
expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
});
it("can create an empty table from schema that specifies field types by name", async () => {
const schemaLike = {
fields: [
{
name: "id",
type: "int64",
nullable: true,
},
{
name: "vector",
type: "float64",
nullable: true,
},
],
metadata: new Map(),
names: ["id", "vector"],
} satisfies SchemaLike;
const table = await con.createEmptyTable("test", schemaLike);
const actualSchema = await table.schema();
expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
});
});

View File

@@ -73,7 +73,7 @@ export type FieldLike =
| { | {
type: string; type: string;
name: string; name: string;
nullable: boolean; nullable?: boolean;
metadata?: Map<string, string>; metadata?: Map<string, string>;
}; };

View File

@@ -326,9 +326,6 @@ export function sanitizeDictionary(typeLike: object) {
// biome-ignore lint/suspicious/noExplicitAny: skip // biome-ignore lint/suspicious/noExplicitAny: skip
export function sanitizeType(typeLike: unknown): DataType<any> { export function sanitizeType(typeLike: unknown): DataType<any> {
if (typeof typeLike === "string") {
return dataTypeFromName(typeLike);
}
if (typeof typeLike !== "object" || typeLike === null) { if (typeof typeLike !== "object" || typeLike === null) {
throw Error("Expected a Type but object was null/undefined"); throw Error("Expected a Type but object was null/undefined");
} }
@@ -450,7 +447,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
case Type.DurationSecond: case Type.DurationSecond:
return new DurationSecond(); return new DurationSecond();
default: default:
throw new Error("Unrecognized type id in schema: " + typeId); throw new Error("Unrecoginized type id in schema: " + typeId);
} }
} }
@@ -470,15 +467,7 @@ export function sanitizeField(fieldLike: unknown): Field {
"The field passed in is missing a `type`/`name`/`nullable` property", "The field passed in is missing a `type`/`name`/`nullable` property",
); );
} }
let type: DataType; const type = sanitizeType(fieldLike.type);
try {
type = sanitizeType(fieldLike.type);
} catch (error: unknown) {
throw Error(
`Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`,
{ cause: error },
);
}
const name = fieldLike.name; const name = fieldLike.name;
if (!(typeof name === "string")) { if (!(typeof name === "string")) {
throw Error("The field passed in had a non-string `name` property"); throw Error("The field passed in had a non-string `name` property");
@@ -592,46 +581,3 @@ function sanitizeData(
}, },
); );
} }
const constructorsByTypeName = {
null: () => new Null(),
binary: () => new Binary(),
utf8: () => new Utf8(),
bool: () => new Bool(),
int8: () => new Int8(),
int16: () => new Int16(),
int32: () => new Int32(),
int64: () => new Int64(),
uint8: () => new Uint8(),
uint16: () => new Uint16(),
uint32: () => new Uint32(),
uint64: () => new Uint64(),
float16: () => new Float16(),
float32: () => new Float32(),
float64: () => new Float64(),
datemillisecond: () => new DateMillisecond(),
dateday: () => new DateDay(),
timenanosecond: () => new TimeNanosecond(),
timemicrosecond: () => new TimeMicrosecond(),
timemillisecond: () => new TimeMillisecond(),
timesecond: () => new TimeSecond(),
intervaldaytime: () => new IntervalDayTime(),
intervalyearmonth: () => new IntervalYearMonth(),
durationnanosecond: () => new DurationNanosecond(),
durationmicrosecond: () => new DurationMicrosecond(),
durationmillisecond: () => new DurationMillisecond(),
durationsecond: () => new DurationSecond(),
} as const;
type MappableTypeName = keyof typeof constructorsByTypeName;
export function dataTypeFromName(typeName: string): DataType {
const normalizedTypeName = typeName.toLowerCase() as MappableTypeName;
const _constructor = constructorsByTypeName[normalizedTypeName];
if (!_constructor) {
throw new Error("Unrecognized type name in schema: " + typeName);
}
return _constructor();
}

View File

@@ -1,12 +1,12 @@
{ {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.22.2-beta.2", "version": "0.22.2-beta.1",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "@lancedb/lancedb", "name": "@lancedb/lancedb",
"version": "0.22.2-beta.2", "version": "0.22.2-beta.1",
"cpu": [ "cpu": [
"x64", "x64",
"arm64" "arm64"

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.25.2" current_version = "0.25.2-beta.2"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.25.2" version = "0.25.2-beta.2"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true
@@ -14,12 +14,12 @@ name = "_lancedb"
crate-type = ["cdylib"] crate-type = ["cdylib"]
[dependencies] [dependencies]
arrow = { version = "56.2", features = ["pyarrow"] } arrow = { version = "55.1", features = ["pyarrow"] }
async-trait = "0.1" async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false } lancedb = { path = "../rust/lancedb", default-features = false }
env_logger.workspace = true env_logger.workspace = true
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] } pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.25", features = [ pyo3-async-runtimes = { version = "0.24", features = [
"attributes", "attributes",
"tokio-runtime", "tokio-runtime",
] } ] }
@@ -28,7 +28,7 @@ futures.workspace = true
tokio = { version = "1.40", features = ["sync"] } tokio = { version = "1.40", features = ["sync"] }
[build-dependencies] [build-dependencies]
pyo3-build-config = { version = "0.25", features = [ pyo3-build-config = { version = "0.24", features = [
"extension-module", "extension-module",
"abi3-py39", "abi3-py39",
] } ] }

View File

@@ -5,7 +5,7 @@ dynamic = ["version"]
dependencies = [ dependencies = [
"deprecation", "deprecation",
"numpy", "numpy",
"overrides>=0.7; python_version<'3.12'", "overrides>=0.7",
"packaging", "packaging",
"pyarrow>=16", "pyarrow>=16",
"pydantic>=1.10", "pydantic>=1.10",

View File

@@ -133,7 +133,6 @@ class Tags:
async def update(self, tag: str, version: int): ... async def update(self, tag: str, version: int): ...
class IndexConfig: class IndexConfig:
name: str
index_type: str index_type: str
columns: List[str] columns: List[str]

View File

@@ -6,18 +6,10 @@ from __future__ import annotations
from abc import abstractmethod from abc import abstractmethod
from pathlib import Path from pathlib import Path
import sys
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
if sys.version_info >= (3, 12):
from typing import override
class EnforceOverrides:
pass
else:
from overrides import EnforceOverrides, override # type: ignore
from lancedb.embeddings.registry import EmbeddingFunctionRegistry from lancedb.embeddings.registry import EmbeddingFunctionRegistry
from overrides import EnforceOverrides, override # type: ignore
from lancedb.common import data_to_reader, sanitize_uri, validate_schema from lancedb.common import data_to_reader, sanitize_uri, validate_schema
from lancedb.background_loop import LOOP from lancedb.background_loop import LOOP

View File

@@ -12,18 +12,13 @@ from __future__ import annotations
from typing import Dict, Iterable, List, Optional, Union from typing import Dict, Iterable, List, Optional, Union
import os import os
import sys
if sys.version_info >= (3, 12):
from typing import override
else:
from overrides import override
from lancedb.db import DBConnection from lancedb.db import DBConnection
from lancedb.table import LanceTable, Table from lancedb.table import LanceTable, Table
from lancedb.util import validate_table_name from lancedb.util import validate_table_name
from lancedb.common import validate_schema from lancedb.common import validate_schema
from lancedb.table import sanitize_create_table from lancedb.table import sanitize_create_table
from overrides import override
from lance_namespace import LanceNamespace, connect as namespace_connect from lance_namespace import LanceNamespace, connect as namespace_connect
from lance_namespace_urllib3_client.models import ( from lance_namespace_urllib3_client.models import (

View File

@@ -5,20 +5,15 @@
from datetime import timedelta from datetime import timedelta
import logging import logging
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import sys
from typing import Any, Dict, Iterable, List, Optional, Union from typing import Any, Dict, Iterable, List, Optional, Union
from urllib.parse import urlparse from urllib.parse import urlparse
import warnings import warnings
if sys.version_info >= (3, 12):
from typing import override
else:
from overrides import override
# Remove this import to fix circular dependency # Remove this import to fix circular dependency
# from lancedb import connect_async # from lancedb import connect_async
from lancedb.remote import ClientConfig from lancedb.remote import ClientConfig
import pyarrow as pa import pyarrow as pa
from overrides import override
from ..common import DATA from ..common import DATA
from ..db import DBConnection, LOOP from ..db import DBConnection, LOOP

View File

@@ -114,7 +114,7 @@ class RemoteTable(Table):
index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar", index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
*, *,
replace: bool = False, replace: bool = False,
wait_timeout: Optional[timedelta] = None, wait_timeout: timedelta = None,
name: Optional[str] = None, name: Optional[str] = None,
): ):
"""Creates a scalar index """Creates a scalar index
@@ -153,7 +153,7 @@ class RemoteTable(Table):
column: str, column: str,
*, *,
replace: bool = False, replace: bool = False,
wait_timeout: Optional[timedelta] = None, wait_timeout: timedelta = None,
with_position: bool = False, with_position: bool = False,
# tokenizer configs: # tokenizer configs:
base_tokenizer: str = "simple", base_tokenizer: str = "simple",

View File

@@ -261,7 +261,7 @@ impl Database for LanceNamespaceDatabase {
return listing_db return listing_db
.open_table(OpenTableRequest { .open_table(OpenTableRequest {
name: request.name.clone(), name: request.name.clone(),
namespace: vec![], namespace: request.namespace.clone(),
index_cache_size: None, index_cache_size: None,
lance_read_params: None, lance_read_params: None,
}) })
@@ -305,14 +305,7 @@ impl Database for LanceNamespaceDatabase {
) )
.await?; .await?;
let create_request = DbCreateTableRequest { listing_db.create_table(request).await
name: request.name,
namespace: vec![],
data: request.data,
mode: request.mode,
write_options: request.write_options,
};
listing_db.create_table(create_request).await
} }
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> { async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
@@ -339,13 +332,7 @@ impl Database for LanceNamespaceDatabase {
.create_listing_database(&request.name, &location, response.storage_options) .create_listing_database(&request.name, &location, response.storage_options)
.await?; .await?;
let open_request = OpenTableRequest { listing_db.open_table(request).await
name: request.name.clone(),
namespace: vec![],
index_cache_size: request.index_cache_size,
lance_read_params: request.lance_read_params,
};
listing_db.open_table(open_request).await
} }
async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> { async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {