more edit

This commit is contained in:
qzhu
2024-11-14 13:33:25 -08:00
parent 955a295026
commit a503845c9f
4 changed files with 243 additions and 7 deletions

View File

@@ -7,6 +7,8 @@ ensuring a seamless transition for existing OSS users.
=== "Python"
```python
--8<-- "python/python/tests/docs/test_cloud.py:import-ingest-data"
--8<-- "python/python/tests/docs/test_cloud.py:ingest_data"
```
=== "Typescript"

View File

@@ -22,7 +22,8 @@ excluded_globs = [
"../src/embeddings/available_embedding_models/text_embedding_functions/*.md",
"../src/embeddings/available_embedding_models/multimodal_embedding_functions/*.md",
"../src/rag/*.md",
"../src/rag/advanced_techniques/*.md"
"../src/rag/advanced_techniques/*.md",
"../src/cloud/*.md"
]

View File

@@ -0,0 +1,230 @@
// --8<-- [start:imports]
import * as lancedb from "@lancedb/lancedb";
// --8<-- [end:imports]
// --8<-- [start:generate_data]
function genData(numRows: number, numVectorDim: number): any[] {
const data = [];
for (let i = 0; i < numRows; i++) {
const vector = [];
for (let j = 0; j < numVectorDim; j++) {
vector.push(i + j * 0.1);
}
data.push({
id: i,
name: `name_${i}`,
vector,
});
}
return data;
}
// --8<-- [end:generate_data]
test("cloud quickstart", async () => {
{
// --8<-- [start:connect]
const db = await lancedb.connect({
uri: "db://your-project-slug",
apiKey: "your-api-key",
region: "your-cloud-region",
});
// --8<-- [end:connect]
// --8<-- [start:create_table]
const tableName = "myTable"
const data = genData(5000, 1536)
const table = await db.createTable(tableName, data);
// --8<-- [end:create_table]
// --8<-- [start:create_index_search]
// create a vector index
await table.createIndex({
column: "vector",
metric_type: lancedb.MetricType.Cosine,
type: "ivf_pq",
});
const result = await table.search([0.01, 0.02])
.select(["vector", "item"])
.limit(1)
.execute();
// --8<-- [end:create_index_search]
// --8<-- [start:drop_table]
await db.dropTable(tableName);
// --8<-- [end:drop_table]
}
});
test("ingest data", async () => {
// --8<-- [start:ingest_data]
import { Schema, Field, Float32, FixedSizeList, Utf8 } from "apache-arrow";
const db = await lancedb.connect({
uri: "db://your-project-slug",
apiKey: "your-api-key",
region: "us-east-1"
});
const data = [
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
{ vector: [10.2, 100.8], item: "baz", price: 30.0},
{ vector: [1.4, 9.5], item: "fred", price: 40.0},
]
// create an empty table with schema
const schema = new Schema([
new Field(
"vector",
new FixedSizeList(2, new Field("float32", new Float32())),
),
new Field("item", new Utf8()),
new Field("price", new Float32()),
]);
const tableName = "myTable";
const table = await db.createTable({
name: tableName,
schema,
});
await table.add(data);
// --8<-- [end:ingest_data]
});
test("update data", async () => {
// --8<-- [start:connect_db_and_open_table]
const db = await lancedb.connect({
uri: "db://your-project-slug",
apiKey: "your-api-key",
region: "us-east-1"
});
const tableName = "myTable"
const table = await db.openTable(tableName);
// --8<-- [end:connect_db_and_open_table]
// --8<-- [start:update_data]
await table.update({
where: "price < 20.0",
values: { vector: [2, 2], item: "foo-updated" },
});
// --8<-- [end:update_data]
// --8<-- [start:merge_insert]
let newData = [
{vector: [1, 1], item: 'foo-updated', price: 50.0}
];
// upsert
await table.mergeInsert("item", newData, {
whenMatchedUpdateAll: true,
whenNotMatchedInsertAll: true,
});
// --8<-- [end:merge_insert]
// --8<-- [start:delete_data]
// delete data
const predicate = "price = 30.0";
await table.delete(predicate);
// --8<-- [end:delete_data]
});
test("create index", async () => {
const db = await lancedb.connect({
uri: "db://your-project-slug",
apiKey: "your-api-key",
region: "us-east-1"
});
const tableName = "myTable";
const table = await db.openTable(tableName);
// --8<-- [start:create_index]
// the vector column only needs to be specified when there are
// multiple vector columns or the column is not named as "vector"
// L2 is used as the default distance metric
await table.createIndex({
column: "vector",
metric_type: lancedb.MetricType.Cosine,
});
// --8<-- [end:create_index]
// --8<-- [start:create_scalar_index]
await table.createScalarIndex("item");
// --8<-- [end:create_scalar_index]
// --8<-- [start:create_fts_index]
const db = await lancedb.connect({
uri: "db://your-project-slug",
apiKey: "your-api-key",
region: "us-east-1"
});
const tableName = "myTable"
const data = [
{ vector: [3.1, 4.1], text: "Frodo was a happy puppy" },
{ vector: [5.9, 26.5], text: "There are several kittens playing" },
];
const table = createTable(tableName, data);
await table.createIndex("text", {
config: lancedb.Index.fts(),
});
// --8<-- [end:create_fts_index]
});
test("vector search", async () => {
// --8<-- [start:vector_search]
const db = await lancedb.connect({
uri: "db://your-project-slug",
apiKey: "your-api-key",
region: "us-east-1"
});
const tableName = "myTable"
const table = await db.openTable(tableName);
const result = await table.search([0.4, 1.4])
.where("price > 10.0")
.prefilter(true)
.select(["item", "vector"])
.limit(2)
.execute();
// --8<-- [end:vector_search]
});
test("full-text search", async () => {
// --8<-- [start:full_text_search]
const db = await lancedb.connect({
uri: "db://your-project-slug",
apiKey: "your-api-key",
region: "us-east-1"
});
const data = [
{ vector: [3.1, 4.1], text: "Frodo was a happy puppy" },
{ vector: [5.9, 26.5], text: "There are several kittens playing" },
];
const tableName = "myTable"
const table = await db.createTable(tableName, data);
await table.createIndex("text", {
config: lancedb.Index.fts(),
});
await tableName
.search("puppy", queryType="fts")
.select(["text"])
.limit(10)
.toArray();
// --8<-- [end:full_text_search]
});
test("metadata filtering", async () => {
// --8<-- [start:filtering]
const db = await lancedb.connect({
uri: "db://your-project-slug",
apiKey: "your-api-key",
region: "us-east-1"
});
const tableName = "myTable"
const table = await db.openTable(tableName);
await table
.search(Array(2).fill(0.1))
.where("(item IN ('foo', 'bar')) AND (price > 10.0)")
.postfilter()
.toArray();
// --8<-- [end:filtering]
// --8<-- [start:sql_filtering]
await table
.search(Array(2).fill(0.1))
.where("(item IN ('foo', 'bar')) AND (price > 10.0)")
.postfilter()
.toArray();
// --8<-- [end:sql_filtering]
});

View File

@@ -1,9 +1,12 @@
# --8<-- [start:imports]
# --8<-- [start:import-lancedb]
# --8<-- [start:import-ingest-data]
import lancedb
import pyarrow as pa
# --8<-- [end:import-ingest-data]
import numpy as np
# --8<-- [end:import-lancedb]
# --8<-- [end:imports]
# --8<-- [start:gen_data]
def gen_data(total_rows: int, ndims: int = 1536):
@@ -36,6 +39,7 @@ def test_cloud_quickstart():
# create a vector index
table.create_index("cosine", vector_column_name="vector")
result = table.search([0.01, 0.02]).select(["vector", "item"]).limit(1).to_pandas()
print(result)
# --8<-- [end:create_index_search]
# --8<-- [start:drop_table]
db.drop_table(table_name)
@@ -44,9 +48,6 @@ def test_cloud_quickstart():
def test_ingest_data():
# --8<-- [start:ingest_data]
import lancedb
import pyarrow as pa
# connect to LanceDB
db = lancedb.connect(
uri="db://your-project-slug", api_key="your-api-key", region="us-east-1"
@@ -69,7 +70,6 @@ def test_ingest_data():
)
table = db.create_table(table_name, schema=schema)
table.add(data)
# --8<-- [end:ingest_data]
# --8<-- [start:ingest_data_in_batch]
def make_batches():
@@ -195,6 +195,7 @@ def test_search():
.limit(2)
.to_pandas()
)
print(result)
# --8<-- [end:vector_search]
# --8<-- [start:full_text_search]
import lancedb
@@ -253,7 +254,7 @@ def test_search():
# you can use table.list_indices() to make sure indices have been created
reranker = RRFReranker()
results = (
result = (
table.search(
"flower moon",
query_type="hybrid",
@@ -264,6 +265,7 @@ def test_search():
.limit(10)
.to_pandas()
)
print(result)
# --8<-- [end:hybrid_search]
@@ -282,6 +284,7 @@ def test_filtering():
.where("(item IN ('foo', 'bar')) AND (price > 10.0)")
.to_arrow()
)
print(result)
# --8<-- [end:filtering]
# --8<-- [start:sql_filtering]
table.search([100, 102]).where(