doc: use code snippet for typescript examples (#880)

The typescript code is in a fully function file, that will be run via the CI.
This commit is contained in:
Lei Xu
2024-01-27 22:52:37 -08:00
committed by Weston Pace
parent 545a03d7f9
commit d811b89de2
12 changed files with 281 additions and 49 deletions

View File

@@ -56,7 +56,7 @@ Lance supports `IVF_PQ` index type by default.
data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},)
}
const table = await db.createTable('my_vectors', data)
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 256, num_sub_vectors: 96 })
await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 16, num_sub_vectors: 48 })
```
- **metric** (default: "L2"): The distance metric to use. By default it uses euclidean distance "`L2`".
@@ -221,4 +221,4 @@ On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows
`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. Because
PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in
less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and
more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.
more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency.

View File

@@ -54,22 +54,18 @@
=== "Javascript"
```javascript
const lancedb = require("vectordb");
```typescript
--8<-- "src/basic_legacy.ts:import"
const uri = "data/sample-lancedb";
const db = await lancedb.connect(uri);
```
--8<-- "src/basic_legacy.ts:open_db"
```
=== "Rust"
```rust
use vectordb::connect;
#[tokio::main]
async fn main() -> Result<()> {
let uri = "data/sample-lancedb";
let db = connect(uri).await?;
--8<-- "src/basic.rs:connect"
}
```
@@ -105,11 +101,7 @@ If you need a reminder of the uri, you can call `db.uri()`.
=== "Javascript"
```javascript
const tbl = await db.createTable(
"myTable",
[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]
)
--8<-- "src/basic_legacy.ts:create_table"
```
If the table already exists, LanceDB will raise an error by default.
@@ -160,10 +152,7 @@ In this case, you can create an empty table and specify the schema.
=== "Javascript"
```typescript
import { Schema, Field, FixedSizeList, DataType } from "apache-arrow";
schema = new Schema([new new Field("vec", new FixedSizeList(2, new Field("item", new Float32())))])
tbl = await db.createTable({ name: "empty_table", schema: schema });
--8<-- "src/basic_legacy.ts:create_empty_table"
```
=== "Rust"
@@ -269,7 +258,7 @@ Once you've embedded the query, you can find its nearest neighbors using the fol
=== "Javascript"
```javascript
const query = await tbl.search([100, 100]).limit(2).execute();
--8<-- "src/basic_legacy.ts:search"
```
=== "Rust"
@@ -291,13 +280,13 @@ For tables with more than 50K vectors, creating an ANN index is recommended to s
=== "Python"
```python
```py
tbl.create_index()
```
=== "Javascript"
```javascript
```{.typescript .ignore}
await tbl.createIndex({})
```
@@ -324,7 +313,7 @@ This can delete any number of rows that match the filter.
=== "Javascript"
```javascript
await tbl.delete('item = "fizz"')
--8<-- "src/basic_legacy.ts:delete"
```
=== "Rust"
@@ -362,7 +351,7 @@ Use the `drop_table()` method on the database to remove a table.
=== "JavaScript"
```javascript
await db.dropTable('myTable')
--8<-- "src/basic_legacy.ts:drop_table"
```
This permanently removes the table and is not recoverable, unlike deleting rows.

1
docs/src/basic.rs Symbolic link
View File

@@ -0,0 +1 @@
../../rust/vectordb/examples/simple.rs

53
docs/src/basic_legacy.ts Normal file
View File

@@ -0,0 +1,53 @@
// --8<-- [start:import]
import * as lancedb from "vectordb";
import { Schema, Field, Float32, FixedSizeList, Int32 } from "apache-arrow";
// --8<-- [end:import]
import * as fs from "fs";
import { Table as ArrowTable, Utf8 } from "apache-arrow";
const example = async () => {
fs.rmSync("data/sample-lancedb", { recursive: true, force: true });
// --8<-- [start:open_db]
const uri = "data/sample-lancedb";
const db = await lancedb.connect(uri);
// --8<-- [end:open_db]
// --8<-- [start:create_table]
const tbl = await db.createTable(
"myTable",
[
{ vector: [3.1, 4.1], item: "foo", price: 10.0 },
{ vector: [5.9, 26.5], item: "bar", price: 20.0 },
],
{ writeMode: lancedb.WriteMode.Overwrite }
);
// --8<-- [end:create_table]
// --8<-- [start:create_empty_table]
const schema = new Schema([
new Field("id", new Int32()),
new Field("name", new Utf8()),
]);
const empty_tbl = await db.createTable({ name: "empty_table", schema });
// --8<-- [end:create_empty_table]
// --8<-- [start:search]
const query = await tbl.search([100, 100]).limit(2).execute();
// --8<-- [end:search]
console.log(query);
// --8<-- [start:delete]
await tbl.delete('item = "fizz"');
// --8<-- [end:delete]
// --8<-- [start:drop_table]
await db.dropTable("myTable");
// --8<-- [end:drop_table]
};
async function main() {
await example();
console.log("Basic example: done");
}
main();