Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
516478ec3d Bump version: 0.22.2-beta.0 → 0.22.2-beta.1 2025-09-30 19:31:11 +00:00
48 changed files with 464 additions and 1033 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.2-beta.2"
current_version = "0.22.2-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,45 +0,0 @@
name: Create Failure Issue
description: Creates a GitHub issue if any jobs in the workflow failed
inputs:
job-results:
description: 'JSON string of job results from needs context'
required: true
workflow-name:
description: 'Name of the workflow'
required: true
runs:
using: composite
steps:
- name: Check for failures and create issue
shell: bash
env:
JOB_RESULTS: ${{ inputs.job-results }}
WORKFLOW_NAME: ${{ inputs.workflow-name }}
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
GH_TOKEN: ${{ github.token }}
run: |
# Check if any job failed
if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
echo "Detected job failures, creating issue..."
# Extract failed job names
FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
# Create issue with workflow name, failed jobs, and run URL
gh issue create \
--title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
--body "The workflow **$WORKFLOW_NAME** failed during execution.
**Failed jobs:** $FAILED_JOBS
**Run URL:** $RUN_URL
Please investigate the failed jobs and address any issues." \
--label "ci"
echo "Issue created successfully"
else
echo "No job failures detected, skipping issue creation"
fi

View File

@@ -38,17 +38,3 @@ jobs:
- name: Publish the package
run: |
cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
report-failure:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [build]
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue
with:
job-results: ${{ toJSON(needs) }}
workflow-name: ${{ github.workflow }}

View File

@@ -43,6 +43,7 @@ jobs:
- uses: Swatinem/rust-cache@v2
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: "1.81.0"
cache-workspaces: "./java/core/lancedb-jni"
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
@@ -111,17 +112,3 @@ jobs:
env:
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
report-failure:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [linux-arm64, linux-x86, macos-arm64]
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue
with:
job-results: ${{ toJSON(needs) }}
workflow-name: ${{ github.workflow }}

View File

@@ -6,7 +6,6 @@ on:
- main
pull_request:
paths:
- Cargo.toml
- nodejs/**
- .github/workflows/nodejs.yml
- docker-compose.yml

View File

@@ -365,17 +365,3 @@ jobs:
ARGS="$ARGS --tag preview"
fi
npm publish $ARGS
report-failure:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [build-lancedb, test-lancedb, publish]
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue
with:
job-results: ${{ toJSON(needs) }}
workflow-name: ${{ github.workflow }}

View File

@@ -173,17 +173,3 @@ jobs:
generate_release_notes: false
name: Python LanceDB v${{ steps.extract_version.outputs.version }}
body: ${{ steps.python_release_notes.outputs.changelog }}
report-failure:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [linux, mac, windows]
permissions:
contents: read
issues: write
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue
with:
job-results: ${{ toJSON(needs) }}
workflow-name: ${{ github.workflow }}

View File

@@ -6,7 +6,6 @@ on:
- main
pull_request:
paths:
- Cargo.toml
- python/**
- .github/workflows/python.yml

View File

@@ -125,9 +125,6 @@ jobs:
- name: Run examples
run: cargo run --example simple --locked
- name: Run remote tests
# Running this requires access to secrets, so skip if this is
# a PR from a fork.
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
run: make -C ./lancedb remote-tests
macos:

753
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,30 +15,30 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.38.2", default-features = false, "features" = ["dynamodb"] }
lance-io = { "version" = "=0.38.2", default-features = false }
lance-index = "=0.38.2"
lance-linalg = "=0.38.2"
lance-table = "=0.38.2"
lance-testing = "=0.38.2"
lance-datafusion = "=0.38.2"
lance-encoding = "=0.38.2"
lance-namespace = "0.0.18"
lance = { "version" = "=0.37.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.37.0", default-features = false, "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-namespace = "0.0.16"
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }
arrow-array = "56.2"
arrow-data = "56.2"
arrow-ipc = "56.2"
arrow-ord = "56.2"
arrow-schema = "56.2"
arrow-cast = "56.2"
arrow = { version = "55.1", optional = false }
arrow-array = "55.1"
arrow-data = "55.1"
arrow-ipc = "55.1"
arrow-ord = "55.1"
arrow-schema = "55.1"
arrow-cast = "55.1"
async-trait = "0"
datafusion = { version = "50.1", default-features = false }
datafusion-catalog = "50.1"
datafusion-common = { version = "50.1", default-features = false }
datafusion-execution = "50.1"
datafusion-expr = "50.1"
datafusion-physical-plan = "50.1"
datafusion = { version = "49.0", default-features = false }
datafusion-catalog = "49.0"
datafusion-common = { version = "49.0", default-features = false }
datafusion-execution = "49.0"
datafusion-expr = "49.0"
datafusion-physical-plan = "49.0"
env_logger = "0.11"
half = { "version" = "2.6.0", default-features = false, features = [
"num-traits",
@@ -61,14 +61,13 @@ chrono = "=0.4.41"
# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
bytemuck_derive = ">=1.8.1, <1.9.0"
# This is only needed when we reference preview releases of lance
# [patch.crates-io]
# # Force to use the same lance version as the rest of the project to avoid duplicate dependencies
# lance = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
# lance-io = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
# lance-index = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
# lance-linalg = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
# lance-table = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
# lance-testing = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
# lance-datafusion = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
# lance-encoding = { "version" = "=0.38.0", "tag" = "v0.38.0", "git" = "https://github.com/lancedb/lance.git" }
[patch.crates-io]
# Force to use the same lance version as the rest of the project to avoid duplicate dependencies
lance = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-index = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-linalg = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-table = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-testing = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-datafusion = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }
lance-encoding = { "version" = "=0.37.0", "tag" = "v0.37.1-beta.1", "git" = "https://github.com/lancedb/lance.git" }

View File

@@ -84,7 +84,6 @@ plugins:
'examples.md': 'https://lancedb.com/docs/tutorials/'
'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'
@@ -403,4 +402,4 @@ extra:
- icon: fontawesome/brands/x-twitter
link: https://twitter.com/lancedb
- icon: fontawesome/brands/linkedin
link: https://www.linkedin.com/company/lancedb
link: https://www.linkedin.com/company/lancedb

View File

@@ -52,30 +52,6 @@ the merge result
***
### useIndex()
```ts
useIndex(useIndex): MergeInsertBuilder
```
Controls whether to use indexes for the merge operation.
When set to `true` (the default), the operation will use an index if available
on the join key for improved performance. When set to `false`, it forces a full
table scan even if an index exists. This can be useful for benchmarking or when
the query optimizer chooses a suboptimal path.
#### Parameters
* **useIndex**: `boolean`
Whether to use indices for the merge operation. Defaults to `true`.
#### Returns
[`MergeInsertBuilder`](MergeInsertBuilder.md)
***
### whenMatchedUpdateAll()
```ts

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.2-beta.2</version>
<version>0.22.2-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.2-beta.2</version>
<version>0.22.2-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.22.2-beta.2</version>
<version>0.22.2-beta.1</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.22.2-beta.2"
version = "0.22.2-beta.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,184 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
import * as arrow from "../lancedb/arrow";
import { sanitizeField, sanitizeType } from "../lancedb/sanitize";
describe("sanitize", function () {
describe("sanitizeType function", function () {
it("should handle type objects", function () {
const type = new arrow.Int32();
const result = sanitizeType(type);
expect(result.typeId).toBe(arrow.Type.Int);
expect((result as arrow.Int).bitWidth).toBe(32);
expect((result as arrow.Int).isSigned).toBe(true);
const floatType = {
typeId: 3, // Type.Float = 3
precision: 2,
toString: () => "Float",
isFloat: true,
isFixedWidth: true,
};
const floatResult = sanitizeType(floatType);
expect(floatResult).toBeInstanceOf(arrow.DataType);
expect(floatResult.typeId).toBe(arrow.Type.Float);
const floatResult2 = sanitizeType({ ...floatType, typeId: () => 3 });
expect(floatResult2).toBeInstanceOf(arrow.DataType);
expect(floatResult2.typeId).toBe(arrow.Type.Float);
});
const allTypeNameTestCases = [
["null", new arrow.Null()],
["binary", new arrow.Binary()],
["utf8", new arrow.Utf8()],
["bool", new arrow.Bool()],
["int8", new arrow.Int8()],
["int16", new arrow.Int16()],
["int32", new arrow.Int32()],
["int64", new arrow.Int64()],
["uint8", new arrow.Uint8()],
["uint16", new arrow.Uint16()],
["uint32", new arrow.Uint32()],
["uint64", new arrow.Uint64()],
["float16", new arrow.Float16()],
["float32", new arrow.Float32()],
["float64", new arrow.Float64()],
["datemillisecond", new arrow.DateMillisecond()],
["dateday", new arrow.DateDay()],
["timenanosecond", new arrow.TimeNanosecond()],
["timemicrosecond", new arrow.TimeMicrosecond()],
["timemillisecond", new arrow.TimeMillisecond()],
["timesecond", new arrow.TimeSecond()],
["intervaldaytime", new arrow.IntervalDayTime()],
["intervalyearmonth", new arrow.IntervalYearMonth()],
["durationnanosecond", new arrow.DurationNanosecond()],
["durationmicrosecond", new arrow.DurationMicrosecond()],
["durationmillisecond", new arrow.DurationMillisecond()],
["durationsecond", new arrow.DurationSecond()],
] as const;
it.each(allTypeNameTestCases)(
'should map type name "%s" to %s',
function (name, expected) {
const result = sanitizeType(name);
expect(result).toBeInstanceOf(expected.constructor);
},
);
const caseVariationTestCases = [
["NULL", new arrow.Null()],
["Utf8", new arrow.Utf8()],
["FLOAT32", new arrow.Float32()],
["DaTedAy", new arrow.DateDay()],
] as const;
it.each(caseVariationTestCases)(
'should be case insensitive for type name "%s" mapped to %s',
function (name, expected) {
const result = sanitizeType(name);
expect(result).toBeInstanceOf(expected.constructor);
},
);
it("should throw error for unrecognized type name", function () {
expect(() => sanitizeType("invalid_type")).toThrow(
"Unrecognized type name in schema: invalid_type",
);
});
});
describe("sanitizeField function", function () {
it("should handle field with string type name", function () {
const field = sanitizeField({
name: "string_field",
type: "utf8",
nullable: true,
metadata: new Map([["key", "value"]]),
});
expect(field).toBeInstanceOf(arrow.Field);
expect(field.name).toBe("string_field");
expect(field.type).toBeInstanceOf(arrow.Utf8);
expect(field.nullable).toBe(true);
expect(field.metadata?.get("key")).toBe("value");
});
it("should handle field with type object", function () {
const floatType = {
typeId: 3, // Float
precision: 32,
};
const field = sanitizeField({
name: "float_field",
type: floatType,
nullable: false,
});
expect(field).toBeInstanceOf(arrow.Field);
expect(field.name).toBe("float_field");
expect(field.type).toBeInstanceOf(arrow.DataType);
expect(field.type.typeId).toBe(arrow.Type.Float);
expect((field.type as arrow.Float64).precision).toBe(32);
expect(field.nullable).toBe(false);
});
it("should handle field with direct Type instance", function () {
const field = sanitizeField({
name: "bool_field",
type: new arrow.Bool(),
nullable: true,
});
expect(field).toBeInstanceOf(arrow.Field);
expect(field.name).toBe("bool_field");
expect(field.type).toBeInstanceOf(arrow.Bool);
expect(field.nullable).toBe(true);
});
it("should throw error for invalid field object", function () {
expect(() =>
sanitizeField({
type: "int32",
nullable: true,
}),
).toThrow(
"The field passed in is missing a `type`/`name`/`nullable` property",
);
// Invalid type
expect(() =>
sanitizeField({
name: "invalid",
type: { invalid: true },
nullable: true,
}),
).toThrow("Expected a Type to have a typeId property");
// Invalid nullable
expect(() =>
sanitizeField({
name: "invalid_nullable",
type: "int32",
nullable: "not a boolean",
}),
).toThrow("The field passed in had a non-boolean `nullable` property");
});
it("should report error for invalid type name", function () {
expect(() =>
sanitizeField({
name: "invalid_field",
type: "invalid_type",
nullable: true,
}),
).toThrow(
"Unable to sanitize type for field: invalid_field due to error: Error: Unrecognized type name in schema: invalid_type",
);
});
});
});

View File

@@ -10,13 +10,7 @@ import * as arrow16 from "apache-arrow-16";
import * as arrow17 from "apache-arrow-17";
import * as arrow18 from "apache-arrow-18";
import {
Connection,
MatchQuery,
PhraseQuery,
Table,
connect,
} from "../lancedb";
import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
import {
Table as ArrowTable,
Field,
@@ -27,8 +21,6 @@ import {
Int64,
List,
Schema,
SchemaLike,
Type,
Uint8,
Utf8,
makeArrowTable,
@@ -219,7 +211,8 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
},
);
it("should be able to omit nullable fields", async () => {
// TODO: https://github.com/lancedb/lancedb/issues/1832
it.skip("should be able to omit nullable fields", async () => {
const db = await connect(tmpDir.name);
const schema = new arrow.Schema([
new arrow.Field(
@@ -243,36 +236,23 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
await table.add([data3]);
let res = await table.query().limit(10).toArray();
const resVector = res.map((r) =>
r.vector ? Array.from(r.vector) : null,
);
const resVector = res.map((r) => r.get("vector").toArray());
expect(resVector).toEqual([null, data2.vector, data3.vector]);
const resItem = res.map((r) => r.item);
const resItem = res.map((r) => r.get("item").toArray());
expect(resItem).toEqual(["foo", null, "bar"]);
const resPrice = res.map((r) => r.price);
const resPrice = res.map((r) => r.get("price").toArray());
expect(resPrice).toEqual([10.0, 2.0, 3.0]);
const data4 = { item: "foo" };
// We can't omit a column if it's not nullable
await expect(table.add([data4])).rejects.toThrow(
"Append with different schema",
);
await expect(table.add([data4])).rejects.toThrow("Invalid user input");
// But we can alter columns to make them nullable
await table.alterColumns([{ path: "price", nullable: true }]);
await table.add([data4]);
res = (await table.query().limit(10).toArray()).map((r) => ({
...r.toJSON(),
vector: r.vector ? Array.from(r.vector) : null,
}));
// Rust fills missing nullable fields with null
expect(res).toEqual([
{ ...data1, vector: null },
{ ...data2, item: null },
data3,
{ ...data4, price: null, vector: null },
]);
res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
expect(res).toEqual([data1, data2, data3, data4]);
});
it("should be able to insert nullable data for non-nullable fields", async () => {
@@ -350,43 +330,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
const table = await db.createTable("my_table", data);
expect(await table.countRows()).toEqual(2);
});
it("should allow undefined and omitted nullable vector fields", async () => {
// Test for the bug: can't pass undefined or omit vector column
const db = await connect("memory://");
const schema = new arrow.Schema([
new arrow.Field("id", new arrow.Int32(), true),
new arrow.Field(
"vector",
new arrow.FixedSizeList(
32,
new arrow.Field("item", new arrow.Float32(), true),
),
true, // nullable = true
),
]);
const table = await db.createEmptyTable("test_table", schema);
// Should not throw error for undefined value
await table.add([{ id: 0, vector: undefined }]);
// Should not throw error for omitted field
await table.add([{ id: 1 }]);
// Should still work for null
await table.add([{ id: 2, vector: null }]);
// Should still work for actual vector
const testVector = new Array(32).fill(0.5);
await table.add([{ id: 3, vector: testVector }]);
expect(await table.countRows()).toEqual(4);
const res = await table.query().limit(10).toArray();
const resVector = res.map((r) =>
r.vector ? Array.from(r.vector) : null,
);
expect(resVector).toEqual([null, null, null, testVector]);
});
},
);
@@ -1511,9 +1454,7 @@ describe("when optimizing a dataset", () => {
it("delete unverified", async () => {
const version = await table.version();
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
version - 1
}.manifest`;
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`;
fs.rmSync(versionFile);
let stats = await table.optimize({ deleteUnverified: false });
@@ -2027,52 +1968,3 @@ describe("column name options", () => {
expect(results2.length).toBe(10);
});
});
describe("when creating an empty table", () => {
let con: Connection;
beforeEach(async () => {
const tmpDir = tmp.dirSync({ unsafeCleanup: true });
con = await connect(tmpDir.name);
});
afterEach(() => {
con.close();
});
it("can create an empty table from an arrow Schema", async () => {
const schema = new Schema([
new Field("id", new Int64()),
new Field("vector", new Float64()),
]);
const table = await con.createEmptyTable("test", schema);
const actualSchema = await table.schema();
expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
});
it("can create an empty table from schema that specifies field types by name", async () => {
const schemaLike = {
fields: [
{
name: "id",
type: "int64",
nullable: true,
},
{
name: "vector",
type: "float64",
nullable: true,
},
],
metadata: new Map(),
names: ["id", "vector"],
} satisfies SchemaLike;
const table = await con.createEmptyTable("test", schemaLike);
const actualSchema = await table.schema();
expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
});
});

View File

@@ -73,7 +73,7 @@ export type FieldLike =
| {
type: string;
name: string;
nullable: boolean;
nullable?: boolean;
metadata?: Map<string, string>;
};
@@ -1285,36 +1285,19 @@ function validateSchemaEmbeddings(
if (isFixedSizeList(field.type)) {
field = sanitizeField(field);
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
// Check if there's an embedding function registered for this field
let hasEmbeddingFunction = false;
// Check schema metadata for embedding functions
if (schema.metadata.has("embedding_functions")) {
const embeddings = JSON.parse(
schema.metadata.get("embedding_functions")!,
);
// biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) {
hasEmbeddingFunction = true;
}
}
// Check passed embedding function parameter
if (embeddings && embeddings.vectorColumn === field.name) {
hasEmbeddingFunction = true;
}
// If the field is nullable AND there's no embedding function, allow undefined/omitted values
if (field.nullable && !hasEmbeddingFunction) {
fields.push(field);
} else {
// Either not nullable OR has embedding function - require explicit values
if (hasEmbeddingFunction) {
// Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
fields.push(field);
} else {
if (
// biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
undefined
) {
missingEmbeddingFields.push(field);
}
} else {
missingEmbeddingFields.push(field);
}
} else {
fields.push(field);

View File

@@ -326,9 +326,6 @@ export function sanitizeDictionary(typeLike: object) {
// biome-ignore lint/suspicious/noExplicitAny: skip
export function sanitizeType(typeLike: unknown): DataType<any> {
if (typeof typeLike === "string") {
return dataTypeFromName(typeLike);
}
if (typeof typeLike !== "object" || typeLike === null) {
throw Error("Expected a Type but object was null/undefined");
}
@@ -450,7 +447,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
case Type.DurationSecond:
return new DurationSecond();
default:
throw new Error("Unrecognized type id in schema: " + typeId);
throw new Error("Unrecoginized type id in schema: " + typeId);
}
}
@@ -470,15 +467,7 @@ export function sanitizeField(fieldLike: unknown): Field {
"The field passed in is missing a `type`/`name`/`nullable` property",
);
}
let type: DataType;
try {
type = sanitizeType(fieldLike.type);
} catch (error: unknown) {
throw Error(
`Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`,
{ cause: error },
);
}
const type = sanitizeType(fieldLike.type);
const name = fieldLike.name;
if (!(typeof name === "string")) {
throw Error("The field passed in had a non-string `name` property");
@@ -592,46 +581,3 @@ function sanitizeData(
},
);
}
const constructorsByTypeName = {
null: () => new Null(),
binary: () => new Binary(),
utf8: () => new Utf8(),
bool: () => new Bool(),
int8: () => new Int8(),
int16: () => new Int16(),
int32: () => new Int32(),
int64: () => new Int64(),
uint8: () => new Uint8(),
uint16: () => new Uint16(),
uint32: () => new Uint32(),
uint64: () => new Uint64(),
float16: () => new Float16(),
float32: () => new Float32(),
float64: () => new Float64(),
datemillisecond: () => new DateMillisecond(),
dateday: () => new DateDay(),
timenanosecond: () => new TimeNanosecond(),
timemicrosecond: () => new TimeMicrosecond(),
timemillisecond: () => new TimeMillisecond(),
timesecond: () => new TimeSecond(),
intervaldaytime: () => new IntervalDayTime(),
intervalyearmonth: () => new IntervalYearMonth(),
durationnanosecond: () => new DurationNanosecond(),
durationmicrosecond: () => new DurationMicrosecond(),
durationmillisecond: () => new DurationMillisecond(),
durationsecond: () => new DurationSecond(),
} as const;
type MappableTypeName = keyof typeof constructorsByTypeName;
export function dataTypeFromName(typeName: string): DataType {
const normalizedTypeName = typeName.toLowerCase() as MappableTypeName;
const _constructor = constructorsByTypeName[normalizedTypeName];
if (!_constructor) {
throw new Error("Unrecognized type name in schema: " + typeName);
}
return _constructor();
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.0",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.22.2-beta.2",
"version": "0.22.2-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.25.2"
current_version = "0.25.2-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.25.2"
version = "0.25.2-beta.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true
@@ -14,12 +14,12 @@ name = "_lancedb"
crate-type = ["cdylib"]
[dependencies]
arrow = { version = "56.2", features = ["pyarrow"] }
arrow = { version = "55.1", features = ["pyarrow"] }
async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false }
env_logger.workspace = true
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.25", features = [
pyo3 = { version = "0.24", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.24", features = [
"attributes",
"tokio-runtime",
] }
@@ -28,7 +28,7 @@ futures.workspace = true
tokio = { version = "1.40", features = ["sync"] }
[build-dependencies]
pyo3-build-config = { version = "0.25", features = [
pyo3-build-config = { version = "0.24", features = [
"extension-module",
"abi3-py39",
] }

View File

@@ -5,7 +5,7 @@ dynamic = ["version"]
dependencies = [
"deprecation",
"numpy",
"overrides>=0.7; python_version<'3.12'",
"overrides>=0.7",
"packaging",
"pyarrow>=16",
"pydantic>=1.10",

View File

@@ -133,7 +133,6 @@ class Tags:
async def update(self, tag: str, version: int): ...
class IndexConfig:
name: str
index_type: str
columns: List[str]

View File

@@ -6,18 +6,10 @@ from __future__ import annotations
from abc import abstractmethod
from pathlib import Path
import sys
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
if sys.version_info >= (3, 12):
from typing import override
class EnforceOverrides:
pass
else:
from overrides import EnforceOverrides, override # type: ignore
from lancedb.embeddings.registry import EmbeddingFunctionRegistry
from overrides import EnforceOverrides, override # type: ignore
from lancedb.common import data_to_reader, sanitize_uri, validate_schema
from lancedb.background_loop import LOOP

View File

@@ -12,18 +12,13 @@ from __future__ import annotations
from typing import Dict, Iterable, List, Optional, Union
import os
import sys
if sys.version_info >= (3, 12):
from typing import override
else:
from overrides import override
from lancedb.db import DBConnection
from lancedb.table import LanceTable, Table
from lancedb.util import validate_table_name
from lancedb.common import validate_schema
from lancedb.table import sanitize_create_table
from overrides import override
from lance_namespace import LanceNamespace, connect as namespace_connect
from lance_namespace_urllib3_client.models import (

View File

@@ -5,20 +5,15 @@
from datetime import timedelta
import logging
from concurrent.futures import ThreadPoolExecutor
import sys
from typing import Any, Dict, Iterable, List, Optional, Union
from urllib.parse import urlparse
import warnings
if sys.version_info >= (3, 12):
from typing import override
else:
from overrides import override
# Remove this import to fix circular dependency
# from lancedb import connect_async
from lancedb.remote import ClientConfig
import pyarrow as pa
from overrides import override
from ..common import DATA
from ..db import DBConnection, LOOP

View File

@@ -114,7 +114,7 @@ class RemoteTable(Table):
index_type: Literal["BTREE", "BITMAP", "LABEL_LIST", "scalar"] = "scalar",
*,
replace: bool = False,
wait_timeout: Optional[timedelta] = None,
wait_timeout: timedelta = None,
name: Optional[str] = None,
):
"""Creates a scalar index
@@ -153,7 +153,7 @@ class RemoteTable(Table):
column: str,
*,
replace: bool = False,
wait_timeout: Optional[timedelta] = None,
wait_timeout: timedelta = None,
with_position: bool = False,
# tokenizer configs:
base_tokenizer: str = "simple",

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.22.2-beta.2"
version = "0.22.2-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -52,13 +52,13 @@ pub fn infer_vector_columns(
for field in reader.schema().fields() {
match field.data_type() {
DataType::FixedSizeList(sub_field, _) if sub_field.data_type().is_floating() => {
columns.push(field.name().clone());
columns.push(field.name().to_string());
}
DataType::List(sub_field) if sub_field.data_type().is_floating() && !strict => {
columns_to_infer.insert(field.name().clone(), None);
columns_to_infer.insert(field.name().to_string(), None);
}
DataType::LargeList(sub_field) if sub_field.data_type().is_floating() && !strict => {
columns_to_infer.insert(field.name().clone(), None);
columns_to_infer.insert(field.name().to_string(), None);
}
_ => {}
}

View File

@@ -718,9 +718,9 @@ impl Database for ListingDatabase {
.map_err(|e| Error::Lance { source: e })?;
let version_ref = match (request.source_version, request.source_tag) {
(Some(v), None) => Ok(Ref::Version(None, Some(v))),
(Some(v), None) => Ok(Ref::Version(v)),
(None, Some(tag)) => Ok(Ref::Tag(tag)),
(None, None) => Ok(Ref::Version(None, Some(source_dataset.version().version))),
(None, None) => Ok(Ref::Version(source_dataset.version().version)),
_ => Err(Error::InvalidInput {
message: "Cannot specify both source_version and source_tag".to_string(),
}),

View File

@@ -261,7 +261,7 @@ impl Database for LanceNamespaceDatabase {
return listing_db
.open_table(OpenTableRequest {
name: request.name.clone(),
namespace: vec![],
namespace: request.namespace.clone(),
index_cache_size: None,
lance_read_params: None,
})
@@ -305,14 +305,7 @@ impl Database for LanceNamespaceDatabase {
)
.await?;
let create_request = DbCreateTableRequest {
name: request.name,
namespace: vec![],
data: request.data,
mode: request.mode,
write_options: request.write_options,
};
listing_db.create_table(create_request).await
listing_db.create_table(request).await
}
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
@@ -339,13 +332,7 @@ impl Database for LanceNamespaceDatabase {
.create_listing_database(&request.name, &location, response.storage_options)
.await?;
let open_request = OpenTableRequest {
name: request.name.clone(),
namespace: vec![],
index_cache_size: request.index_cache_size,
lance_read_params: request.lance_read_params,
};
listing_db.open_table(open_request).await
listing_db.open_table(request).await
}
async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {

View File

@@ -647,7 +647,7 @@ impl From<StorageOptions> for RemoteOptions {
let mut filtered = HashMap::new();
for opt in supported_opts {
if let Some(v) = options.0.get(opt) {
filtered.insert(opt.to_string(), v.clone());
filtered.insert(opt.to_string(), v.to_string());
}
}
Self::new(filtered)

View File

@@ -1383,35 +1383,30 @@ impl Table {
}
pub struct NativeTags {
dataset: dataset::DatasetConsistencyWrapper,
inner: LanceTags,
}
#[async_trait]
impl Tags for NativeTags {
async fn list(&self) -> Result<HashMap<String, TagContents>> {
let dataset = self.dataset.get().await?;
Ok(dataset.tags().list().await?)
Ok(self.inner.list().await?)
}
async fn get_version(&self, tag: &str) -> Result<u64> {
let dataset = self.dataset.get().await?;
Ok(dataset.tags().get_version(tag).await?)
Ok(self.inner.get_version(tag).await?)
}
async fn create(&mut self, tag: &str, version: u64) -> Result<()> {
let dataset = self.dataset.get().await?;
dataset.tags().create(tag, version).await?;
self.inner.create(tag, version).await?;
Ok(())
}
async fn delete(&mut self, tag: &str) -> Result<()> {
let dataset = self.dataset.get().await?;
dataset.tags().delete(tag).await?;
self.inner.delete(tag).await?;
Ok(())
}
async fn update(&mut self, tag: &str, version: u64) -> Result<()> {
let dataset = self.dataset.get().await?;
dataset.tags().update(tag, version).await?;
self.inner.update(tag, version).await?;
Ok(())
}
}
@@ -1785,13 +1780,13 @@ impl NativeTable {
BuiltinIndexType::BTree,
)))
} else {
Err(Error::InvalidInput {
return Err(Error::InvalidInput {
message: format!(
"there are no indices supported for the field `{}` with the data type {}",
field.name(),
field.data_type()
),
})?
});
}
}
Index::BTree(_) => {
@@ -2445,8 +2440,10 @@ impl BaseTable for NativeTable {
}
async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
let dataset = self.dataset.get().await?;
Ok(Box::new(NativeTags {
dataset: self.dataset.clone(),
inner: dataset.tags.clone(),
}))
}

View File

@@ -172,7 +172,7 @@ impl TableProvider for BaseTableAdapter {
if let Some(projection) = projection {
let field_names = projection
.iter()
.map(|i| self.schema.field(*i).name().clone())
.map(|i| self.schema.field(*i).name().to_string())
.collect();
query.select = Select::Columns(field_names);
}

View File

@@ -98,9 +98,8 @@ impl DatasetRef {
}
Self::TimeTravel { dataset, version } => {
let should_checkout = match &target_ref {
refs::Ref::Version(_, Some(target_ver)) => version != target_ver,
refs::Ref::Version(_, None) => true, // No specific version, always checkout
refs::Ref::Tag(_) => true, // Always checkout for tags
refs::Ref::Version(target_ver) => version != target_ver,
refs::Ref::Tag(_) => true, // Always checkout for tags
};
if should_checkout {

View File

@@ -174,7 +174,7 @@ pub(crate) fn default_vector_column(schema: &Schema, dim: Option<i32>) -> Result
),
})
} else {
Ok(candidates[0].clone())
Ok(candidates[0].to_string())
}
}