From a503845c9f2ef723f9cdf9d9433a2fa1fc0f741c Mon Sep 17 00:00:00 2001 From: qzhu Date: Thu, 14 Nov 2024 13:33:25 -0800 Subject: [PATCH] more edit --- docs/src/cloud/ingest_data.md | 2 + docs/test/md_testing.py | 3 +- nodejs/examples/cloud.test.ts | 230 +++++++++++++++++++++++++ python/python/tests/docs/test_cloud.py | 15 +- 4 files changed, 243 insertions(+), 7 deletions(-) create mode 100644 nodejs/examples/cloud.test.ts diff --git a/docs/src/cloud/ingest_data.md b/docs/src/cloud/ingest_data.md index 183af080..f0f302a2 100644 --- a/docs/src/cloud/ingest_data.md +++ b/docs/src/cloud/ingest_data.md @@ -7,6 +7,8 @@ ensuring a seamless transition for existing OSS users. === "Python" ```python + --8<-- "python/python/tests/docs/test_cloud.py:import-ingest-data" + --8<-- "python/python/tests/docs/test_cloud.py:ingest_data" ``` === "Typescript" diff --git a/docs/test/md_testing.py b/docs/test/md_testing.py index 871c2ccd..1971c153 100755 --- a/docs/test/md_testing.py +++ b/docs/test/md_testing.py @@ -22,7 +22,8 @@ excluded_globs = [ "../src/embeddings/available_embedding_models/text_embedding_functions/*.md", "../src/embeddings/available_embedding_models/multimodal_embedding_functions/*.md", "../src/rag/*.md", - "../src/rag/advanced_techniques/*.md" + "../src/rag/advanced_techniques/*.md", + "../src/cloud/*.md" ] diff --git a/nodejs/examples/cloud.test.ts b/nodejs/examples/cloud.test.ts new file mode 100644 index 00000000..f85d3581 --- /dev/null +++ b/nodejs/examples/cloud.test.ts @@ -0,0 +1,230 @@ +// --8<-- [start:imports] +import * as lancedb from "@lancedb/lancedb"; +// --8<-- [end:imports] + +// --8<-- [start:generate_data] +function genData(numRows: number, numVectorDim: number): any[] { + const data = []; + for (let i = 0; i < numRows; i++) { + const vector = []; + for (let j = 0; j < numVectorDim; j++) { + vector.push(i + j * 0.1); + } + data.push({ + id: i, + name: `name_${i}`, + vector, + }); + } + return data; +} +// --8<-- [end:generate_data] + +test("cloud quickstart", async () => { + { + // --8<-- [start:connect] + const db = await lancedb.connect({ + uri: "db://your-project-slug", + apiKey: "your-api-key", + region: "your-cloud-region", + }); + // --8<-- [end:connect] + // --8<-- [start:create_table] + const tableName = "myTable" + const data = genData(5000, 1536) + const table = await db.createTable(tableName, data); + // --8<-- [end:create_table] + // --8<-- [start:create_index_search] + // create a vector index + await table.createIndex({ + column: "vector", + metric_type: lancedb.MetricType.Cosine, + type: "ivf_pq", + }); + const result = await table.search([0.01, 0.02]) + .select(["vector", "item"]) + .limit(1) + .execute(); + // --8<-- [end:create_index_search] + // --8<-- [start:drop_table] + await db.dropTable(tableName); + // --8<-- [end:drop_table] + } +}); + +test("ingest data", async () => { + // --8<-- [start:ingest_data] + import { Schema, Field, Float32, FixedSizeList, Utf8 } from "apache-arrow"; + + const db = await lancedb.connect({ + uri: "db://your-project-slug", + apiKey: "your-api-key", + region: "us-east-1" + }); + + const data = [ + { vector: [3.1, 4.1], item: "foo", price: 10.0 }, + { vector: [5.9, 26.5], item: "bar", price: 20.0 }, + { vector: [10.2, 100.8], item: "baz", price: 30.0}, + { vector: [1.4, 9.5], item: "fred", price: 40.0}, + ] + // create an empty table with schema + const schema = new Schema([ + new Field( + "vector", + new FixedSizeList(2, new Field("float32", new Float32())), + ), + new Field("item", new Utf8()), + new Field("price", new Float32()), + ]); + const tableName = "myTable"; + const table = await db.createTable({ + name: tableName, + schema, + }); + await table.add(data); + // --8<-- [end:ingest_data] +}); + +test("update data", async () => { + // --8<-- [start:connect_db_and_open_table] + const db = await lancedb.connect({ + uri: "db://your-project-slug", + apiKey: "your-api-key", + region: "us-east-1" + }); + const tableName = "myTable" + const table = await db.openTable(tableName); + // --8<-- [end:connect_db_and_open_table] + // --8<-- [start:update_data] + await table.update({ + where: "price < 20.0", + values: { vector: [2, 2], item: "foo-updated" }, + }); + // --8<-- [end:update_data] + // --8<-- [start:merge_insert] + let newData = [ + {vector: [1, 1], item: 'foo-updated', price: 50.0} + ]; + // upsert + await table.mergeInsert("item", newData, { + whenMatchedUpdateAll: true, + whenNotMatchedInsertAll: true, + }); + // --8<-- [end:merge_insert] + // --8<-- [start:delete_data] + // delete data + const predicate = "price = 30.0"; + await table.delete(predicate); + // --8<-- [end:delete_data] +}); + +test("create index", async () => { + const db = await lancedb.connect({ + uri: "db://your-project-slug", + apiKey: "your-api-key", + region: "us-east-1" + }); + + const tableName = "myTable"; + const table = await db.openTable(tableName); + // --8<-- [start:create_index] + // the vector column only needs to be specified when there are + // multiple vector columns or the column is not named as "vector" + // L2 is used as the default distance metric + await table.createIndex({ + column: "vector", + metric_type: lancedb.MetricType.Cosine, + }); + + // --8<-- [end:create_index] + // --8<-- [start:create_scalar_index] + await table.createScalarIndex("item"); + // --8<-- [end:create_scalar_index] + // --8<-- [start:create_fts_index] + const db = await lancedb.connect({ + uri: "db://your-project-slug", + apiKey: "your-api-key", + region: "us-east-1" + }); + + const tableName = "myTable" + const data = [ + { vector: [3.1, 4.1], text: "Frodo was a happy puppy" }, + { vector: [5.9, 26.5], text: "There are several kittens playing" }, + ]; + const table = createTable(tableName, data); + await table.createIndex("text", { + config: lancedb.Index.fts(), + }); + // --8<-- [end:create_fts_index] +}); + +test("vector search", async () => { + // --8<-- [start:vector_search] + const db = await lancedb.connect({ + uri: "db://your-project-slug", + apiKey: "your-api-key", + region: "us-east-1" + }); + + const tableName = "myTable" + const table = await db.openTable(tableName); + const result = await table.search([0.4, 1.4]) + .where("price > 10.0") + .prefilter(true) + .select(["item", "vector"]) + .limit(2) + .execute(); + // --8<-- [end:vector_search] +}); + +test("full-text search", async () => { + // --8<-- [start:full_text_search] + const db = await lancedb.connect({ + uri: "db://your-project-slug", + apiKey: "your-api-key", + region: "us-east-1" + }); + + const data = [ + { vector: [3.1, 4.1], text: "Frodo was a happy puppy" }, + { vector: [5.9, 26.5], text: "There are several kittens playing" }, + ]; + const tableName = "myTable" + const table = await db.createTable(tableName, data); + await table.createIndex("text", { + config: lancedb.Index.fts(), + }); + + await tableName + .search("puppy", queryType="fts") + .select(["text"]) + .limit(10) + .toArray(); + // --8<-- [end:full_text_search] +}); + +test("metadata filtering", async () => { + // --8<-- [start:filtering] + const db = await lancedb.connect({ + uri: "db://your-project-slug", + apiKey: "your-api-key", + region: "us-east-1" + }); + const tableName = "myTable" + const table = await db.openTable(tableName); + await table + .search(Array(2).fill(0.1)) + .where("(item IN ('foo', 'bar')) AND (price > 10.0)") + .postfilter() + .toArray(); + // --8<-- [end:filtering] + // --8<-- [start:sql_filtering] + await table + .search(Array(2).fill(0.1)) + .where("(item IN ('foo', 'bar')) AND (price > 10.0)") + .postfilter() + .toArray(); + // --8<-- [end:sql_filtering] +}); \ No newline at end of file diff --git a/python/python/tests/docs/test_cloud.py b/python/python/tests/docs/test_cloud.py index 6fd65e07..a8868474 100644 --- a/python/python/tests/docs/test_cloud.py +++ b/python/python/tests/docs/test_cloud.py @@ -1,9 +1,12 @@ # --8<-- [start:imports] +# --8<-- [start:import-lancedb] +# --8<-- [start:import-ingest-data] import lancedb import pyarrow as pa +# --8<-- [end:import-ingest-data] import numpy as np - +# --8<-- [end:import-lancedb] # --8<-- [end:imports] # --8<-- [start:gen_data] def gen_data(total_rows: int, ndims: int = 1536): @@ -36,6 +39,7 @@ def test_cloud_quickstart(): # create a vector index table.create_index("cosine", vector_column_name="vector") result = table.search([0.01, 0.02]).select(["vector", "item"]).limit(1).to_pandas() + print(result) # --8<-- [end:create_index_search] # --8<-- [start:drop_table] db.drop_table(table_name) @@ -44,9 +48,6 @@ def test_cloud_quickstart(): def test_ingest_data(): # --8<-- [start:ingest_data] - import lancedb - import pyarrow as pa - # connect to LanceDB db = lancedb.connect( uri="db://your-project-slug", api_key="your-api-key", region="us-east-1" @@ -69,7 +70,6 @@ def test_ingest_data(): ) table = db.create_table(table_name, schema=schema) table.add(data) - # --8<-- [end:ingest_data] # --8<-- [start:ingest_data_in_batch] def make_batches(): @@ -195,6 +195,7 @@ def test_search(): .limit(2) .to_pandas() ) + print(result) # --8<-- [end:vector_search] # --8<-- [start:full_text_search] import lancedb @@ -253,7 +254,7 @@ def test_search(): # you can use table.list_indices() to make sure indices have been created reranker = RRFReranker() - results = ( + result = ( table.search( "flower moon", query_type="hybrid", @@ -264,6 +265,7 @@ def test_search(): .limit(10) .to_pandas() ) + print(result) # --8<-- [end:hybrid_search] @@ -282,6 +284,7 @@ def test_filtering(): .where("(item IN ('foo', 'bar')) AND (price > 10.0)") .to_arrow() ) + print(result) # --8<-- [end:filtering] # --8<-- [start:sql_filtering] table.search([100, 102]).where(