From e7fdb931deb6883e6e81ace388b01852ce56cdcc Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Sun, 28 Jan 2024 11:39:25 -0800 Subject: [PATCH] chore: convert all js doc test to use snippet. (#881) --- .github/workflows/docs_test.yml | 24 +------- docs/package.json | 4 +- docs/src/ann_indexes.md | 74 ++++++++++++------------- docs/src/ann_indexes.ts | 53 ++++++++++++++++++ docs/src/search.md | 67 ++++++++-------------- docs/src/search_legacy.ts | 41 ++++++++++++++ docs/src/sql.md | 98 ++++++++++++++++----------------- docs/src/sql_legacy.ts | 38 +++++++++++++ docs/test/md_testing.js | 60 -------------------- docs/test/package.json | 13 ----- node/src/index.ts | 2 + node/src/remote/index.ts | 4 ++ 12 files changed, 246 insertions(+), 232 deletions(-) create mode 100644 docs/src/ann_indexes.ts create mode 100644 docs/src/search_legacy.ts create mode 100644 docs/src/sql_legacy.ts delete mode 100644 docs/test/md_testing.js delete mode 100644 docs/test/package.json diff --git a/.github/workflows/docs_test.yml b/.github/workflows/docs_test.yml index 22cb6007..1132bd74 100644 --- a/.github/workflows/docs_test.yml +++ b/.github/workflows/docs_test.yml @@ -68,31 +68,13 @@ jobs: run: | cd node npm ci - npm run build + npm run build-release cd ../docs npm install - - name: Run doc test - run: | - cd docs - npm t - - name: Install dependencies for generated code - run: | - cd docs/test - npm install - - name: Install LanceDB - run: | - cd docs/test/node_modules/vectordb - npm ci - npm run build-release - npm run tsc - - name: Create test files - run: | - cd docs/test - node md_testing.js - name: Test env: LANCEDB_URI: ${{ secrets.LANCEDB_URI }} LANCEDB_DEV_API_KEY: ${{ secrets.LANCEDB_DEV_API_KEY }} run: | - cd docs/test/node - for d in *; do cd "$d"; echo "$d".js; node "$d".js; cd ..; done + cd docs + npm t diff --git a/docs/package.json b/docs/package.json index 792cdc89..041e5524 100644 --- a/docs/package.json +++ b/docs/package.json @@ -9,9 +9,9 @@ "vectordb": "file:../node" }, "scripts": { - "build": "tsc -b && cd ../node && npm run build", + "build": "tsc -b && cd ../node && npm run build-release", "example": "npm run build && node", - "test": "npm run build && node $(ls dist/*.js)" + "test": "npm run build && ls dist/*.js | xargs -n 1 node" }, "devDependencies": { "@types/node": "^20.11.8", diff --git a/docs/src/ann_indexes.md b/docs/src/ann_indexes.md index 230e5a3e..bc745b75 100644 --- a/docs/src/ann_indexes.md +++ b/docs/src/ann_indexes.md @@ -7,7 +7,7 @@ for brute-force scanning of the entire vector space. A vector index is faster but less accurate than exhaustive search (kNN or flat search). LanceDB provides many parameters to fine-tune the index's size, the speed of queries, and the accuracy of results. -Currently, LanceDB does *not* automatically create the ANN index. +Currently, LanceDB does _not_ automatically create the ANN index. LanceDB has optimized code for kNN as well. For many use-cases, datasets under 100K vectors won't require index creation at all. If you can live with <100ms latency, skipping index creation is a simpler workflow while guaranteeing 100% recall. @@ -17,16 +17,17 @@ In the future we will look to automatically create and configure the ANN index a Lance can support multiple index types, the most widely used one is `IVF_PQ`. -* `IVF_PQ`: use **Inverted File Index (IVF)** to first divide the dataset into `N` partitions, - and then use **Product Quantization** to compress vectors in each partition. -* `DiskANN` (**Experimental**): organize the vector as a on-disk graph, where the vertices approximately - represent the nearest neighbors of each vector. +- `IVF_PQ`: use **Inverted File Index (IVF)** to first divide the dataset into `N` partitions, + and then use **Product Quantization** to compress vectors in each partition. +- `DiskANN` (**Experimental**): organize the vector as a on-disk graph, where the vertices approximately + represent the nearest neighbors of each vector. ## Creating an IVF_PQ Index Lance supports `IVF_PQ` index type by default. === "Python" + Creating indexes is done via the [create_index](https://lancedb.github.io/lancedb/python/#lancedb.table.LanceTable.create_index) method. ```python @@ -47,24 +48,19 @@ Lance supports `IVF_PQ` index type by default. ``` === "Javascript" - ```javascript - const vectordb = require('vectordb') - const db = await vectordb.connect('data/sample-lancedb') - let data = [] - for (let i = 0; i < 10_000; i++) { - data.push({vector: Array(1536).fill(i), id: `${i}`, content: "", longId: `${i}`},) - } - const table = await db.createTable('my_vectors', data) - await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 16, num_sub_vectors: 48 }) + ```javascript + --8<--- "src/ann_indexes.ts:import" + + --8<-- "src/ann_indexes.ts:ingest" ``` - **metric** (default: "L2"): The distance metric to use. By default it uses euclidean distance "`L2`". -We also support "cosine" and "dot" distance as well. + We also support "cosine" and "dot" distance as well. - **num_partitions** (default: 256): The number of partitions of the index. - **num_sub_vectors** (default: 96): The number of sub-vectors (M) that will be created during Product Quantization (PQ). -For D dimensional vector, it will be divided into `M` of `D/M` sub-vectors, each of which is presented by -a single PQ code. + For D dimensional vector, it will be divided into `M` of `D/M` sub-vectors, each of which is presented by + a single PQ code.
![IVF PQ](./assets/ivf_pq.png) @@ -78,7 +74,7 @@ Using GPU for index creation requires [PyTorch>2.0](https://pytorch.org/) being You can specify the GPU device to train IVF partitions via -- **accelerator**: Specify to ``cuda`` or ``mps`` (on Apple Silicon) to enable GPU training. +- **accelerator**: Specify to `cuda` or `mps` (on Apple Silicon) to enable GPU training. === "Linux" @@ -106,10 +102,9 @@ You can specify the GPU device to train IVF partitions via Trouble shootings: -If you see ``AssertionError: Torch not compiled with CUDA enabled``, you need to [install +If you see `AssertionError: Torch not compiled with CUDA enabled`, you need to [install PyTorch with CUDA support](https://pytorch.org/get-started/locally/). - ## Querying an ANN Index Querying vector indexes is done via the [search](https://lancedb.github.io/lancedb/python/#lancedb.table.LanceTable.search) function. @@ -127,6 +122,7 @@ There are a couple of parameters that can be used to fine-tune the search: Note: refine_factor is only applicable if an ANN index is present. If specified on a table without an ANN index, it is ignored. === "Python" + ```python tbl.search(np.random.random((1536))) \ .limit(2) \ @@ -134,41 +130,35 @@ There are a couple of parameters that can be used to fine-tune the search: .refine_factor(10) \ .to_pandas() ``` - ``` + + ```text vector item _distance 0 [0.44949695, 0.8444449, 0.06281311, 0.23338133... item 1141 103.575333 1 [0.48587373, 0.269207, 0.15095535, 0.65531915,... item 3953 108.393867 ``` === "Javascript" + ```javascript - const results_1 = await table - .search(Array(1536).fill(1.2)) - .limit(2) - .nprobes(20) - .refineFactor(10) - .execute() + --8<-- "src/ann_indexes.ts:search1" ``` The search will return the data requested in addition to the distance of each item. - ### Filtering (where clause) You can further filter the elements returned by a search using a where clause. === "Python" + ```python tbl.search(np.random.random((1536))).where("item != 'item 1141'").to_pandas() ``` === "Javascript" + ```javascript - const results_2 = await table - .search(Array(1536).fill(1.2)) - .where("id != '1141'") - .limit(2) - .execute() + --8<-- "src/ann_indexes.ts:search2" ``` ### Projections (select clause) @@ -176,23 +166,23 @@ You can further filter the elements returned by a search using a where clause. You can select the columns returned by the query using a select clause. === "Python" + ```python tbl.search(np.random.random((1536))).select(["vector"]).to_pandas() ``` - ``` - vector _distance + + + ```text + vector _distance 0 [0.30928212, 0.022668175, 0.1756372, 0.4911822... 93.971092 1 [0.2525465, 0.01723831, 0.261568, 0.002007689,... 95.173485 ... ``` === "Javascript" + ```javascript - const results_3 = await table - .search(Array(1536).fill(1.2)) - .select(["id"]) - .limit(2) - .execute() + --8<-- "src/ann_indexes.ts:search3" ``` ## FAQ @@ -222,3 +212,7 @@ On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in less space distortion, and thus yields better accuracy. However, a higher `num_sub_vectors` also causes heavier I/O and more PQ computation, and thus, higher latency. `dimension / num_sub_vectors` should be a multiple of 8 for optimum SIMD efficiency. + +``` + +``` diff --git a/docs/src/ann_indexes.ts b/docs/src/ann_indexes.ts new file mode 100644 index 00000000..b6bafb8c --- /dev/null +++ b/docs/src/ann_indexes.ts @@ -0,0 +1,53 @@ +// --8<-- [start:import] +import * as vectordb from "vectordb"; +// --8<-- [end:import] + +(async () => { + // --8<-- [start:ingest] + const db = await vectordb.connect("data/sample-lancedb"); + + let data = []; + for (let i = 0; i < 10_000; i++) { + data.push({ + vector: Array(1536).fill(i), + id: `${i}`, + content: "", + longId: `${i}`, + }); + } + const table = await db.createTable("my_vectors", data); + await table.createIndex({ + type: "ivf_pq", + column: "vector", + num_partitions: 16, + num_sub_vectors: 48, + }); + // --8<-- [end:ingest] + + // --8<-- [start:search1] + const results_1 = await table + .search(Array(1536).fill(1.2)) + .limit(2) + .nprobes(20) + .refineFactor(10) + .execute(); + // --8<-- [end:search1] + + // --8<-- [start:search2] + const results_2 = await table + .search(Array(1536).fill(1.2)) + .where("id != '1141'") + .limit(2) + .execute(); + // --8<-- [end:search2] + + // --8<-- [start:search3] + const results_3 = await table + .search(Array(1536).fill(1.2)) + .select(["id"]) + .limit(2) + .execute(); + // --8<-- [end:search3] + + console.log("Ann indexes: done"); +})(); diff --git a/docs/src/search.md b/docs/src/search.md index e80d4ffd..ac4613f1 100644 --- a/docs/src/search.md +++ b/docs/src/search.md @@ -2,27 +2,26 @@ A vector search finds the approximate or exact nearest neighbors to a given query vector. -* In a recommendation system or search engine, you can find similar records to -the one you searched. -* In LLM and other AI applications, -each data point can be represented by [embeddings generated from existing models](embeddings/index.md), -following which the search returns the most relevant features. +- In a recommendation system or search engine, you can find similar records to + the one you searched. +- In LLM and other AI applications, + each data point can be represented by [embeddings generated from existing models](embeddings/index.md), + following which the search returns the most relevant features. ## Distance metrics Distance metrics are a measure of the similarity between a pair of vectors. Currently, LanceDB supports the following metrics: -| Metric | Description | -| ----------- | ------------------------------------ | -| `l2` | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) | -| `cosine` | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity)| -| `dot` | [Dot Production](https://en.wikipedia.org/wiki/Dot_product) | - +| Metric | Description | +| -------- | --------------------------------------------------------------------------- | +| `l2` | [Euclidean / L2 distance](https://en.wikipedia.org/wiki/Euclidean_distance) | +| `cosine` | [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity) | +| `dot` | [Dot Production](https://en.wikipedia.org/wiki/Dot_product) | ## Exhaustive search (kNN) -If you do not create a vector index, LanceDB exhaustively scans the *entire* vector space +If you do not create a vector index, LanceDB exhaustively scans the _entire_ vector space and compute the distance to every vector in order to find the exact nearest neighbors. This is effectively a kNN search. - === "Python" - ```python import lancedb import numpy as np @@ -70,17 +56,12 @@ await db_setup.createTable('my_vectors', data) === "JavaScript" ```javascript - const vectordb = require('vectordb') - const db = await vectordb.connect('data/sample-lancedb') + --8<-- "src/search_legacy.ts:import" - const tbl = await db.openTable("my_vectors") - - const results_1 = await tbl.search(Array(1536).fill(1.2)) - .limit(10) - .execute() + --8<-- "src/search_legancy.ts:search1" ``` -By default, `l2` will be used as metric type. You can specify the metric type as +By default, `l2` will be used as metric type. You can specify the metric type as `cosine` or `dot` if required. === "Python" @@ -92,20 +73,16 @@ By default, `l2` will be used as metric type. You can specify the metric type as .to_list() ``` - === "JavaScript" ```javascript - const results_2 = await tbl.search(Array(1536).fill(1.2)) - .metricType("cosine") - .limit(10) - .execute() + --8<-- "src/search_legacy.ts:search2" ``` ## Approximate nearest neighbor (ANN) search To perform scalable vector retrieval with acceptable latencies, it's common to build a vector index. -While the exhaustive search is guaranteed to always return 100% recall, the approximate nature of +While the exhaustive search is guaranteed to always return 100% recall, the approximate nature of an ANN search means that using an index often involves a trade-off between recall and latency. See the [IVF_PQ index](./concepts/index_ivfpq.md.md) for a deeper description of how `IVF_PQ` @@ -117,7 +94,9 @@ LanceDB returns vector search results via different formats commonly used in pyt Let's create a LanceDB table with a nested schema: === "Python" + ```python + from datetime import datetime import lancedb from lancedb.pydantic import LanceModel, Vector @@ -153,7 +132,7 @@ Let's create a LanceDB table with a nested schema: ### As a PyArrow table Using `to_arrow()` we can get the results back as a pyarrow Table. - This result table has the same columns as the LanceDB table, with + This result table has the same columns as the LanceDB table, with the addition of an `_distance` column for vector search or a `score` column for full text search. @@ -169,11 +148,11 @@ Let's create a LanceDB table with a nested schema: tbl.search(np.random.randn(1536)).to_pandas() ``` - While other formats like Arrow/Pydantic/Python dicts have a natural - way to handle nested schemas, pandas can only store nested data as a + While other formats like Arrow/Pydantic/Python dicts have a natural + way to handle nested schemas, pandas can only store nested data as a python dict column, which makes it difficult to support nested references. - So for convenience, you can also tell LanceDB to flatten a nested schema - when creating the pandas dataframe. + So for convenience, you can also tell LanceDB to flatten a nested schema + when creating the pandas dataframe. ```python tbl.search(np.random.randn(1536)).to_pandas(flatten=True) diff --git a/docs/src/search_legacy.ts b/docs/src/search_legacy.ts new file mode 100644 index 00000000..dab567e3 --- /dev/null +++ b/docs/src/search_legacy.ts @@ -0,0 +1,41 @@ +// --8<-- [start:import] +import * as lancedb from "vectordb"; +// --8<-- [end:import] +import * as fs from "fs"; + +async function setup() { + fs.rmSync("data/sample-lancedb", { recursive: true, force: true }); + const db = await lancedb.connect("data/sample-lancedb"); + + let data = []; + for (let i = 0; i < 10_000; i++) { + data.push({ + vector: Array(1536).fill(i), + id: `${i}`, + content: "", + longId: `${i}`, + }); + } + await db.createTable("my_vectors", data); +} + +async () => { + await setup(); + + // --8<-- [start:search1] + const db = await lancedb.connect("data/sample-lancedb"); + const tbl = await db.openTable("my_vectors"); + + const results_1 = await tbl.search(Array(1536).fill(1.2)).limit(10).execute(); + // --8<-- [end:search1] + + // --8<-- [start:search2] + const results_2 = await tbl + .search(Array(1536).fill(1.2)) + .metricType(lancedb.MetricType.Cosine) + .limit(10) + .execute(); + // --8<-- [end:search2] + + console.log("search: done"); +}; diff --git a/docs/src/sql.md b/docs/src/sql.md index 0f7f91f3..c8847529 100644 --- a/docs/src/sql.md +++ b/docs/src/sql.md @@ -8,7 +8,7 @@ option that performs the filter prior to vector search. This can be useful to na the search space on a very large dataset to reduce query latency. === "Python" + ```py result = ( tbl.search([0.5, 0.2]) @@ -44,12 +45,9 @@ const tbl = await db.createTable('myVectors', data) ``` === "JavaScript" + ```javascript - let result = await tbl.search(Array(1536).fill(0.5)) - .limit(1) - .filter("id = 10") - .prefilter(true) - .execute() + --8<-- "src/sql_legacy.ts:search" ``` ## SQL filters @@ -60,14 +58,14 @@ It can be used during vector search, update, and deletion operations. Currently, Lance supports a growing list of SQL expressions. -* ``>``, ``>=``, ``<``, ``<=``, ``=`` -* ``AND``, ``OR``, ``NOT`` -* ``IS NULL``, ``IS NOT NULL`` -* ``IS TRUE``, ``IS NOT TRUE``, ``IS FALSE``, ``IS NOT FALSE`` -* ``IN`` -* ``LIKE``, ``NOT LIKE`` -* ``CAST`` -* ``regexp_match(column, pattern)`` +- `>`, `>=`, `<`, `<=`, `=` +- `AND`, `OR`, `NOT` +- `IS NULL`, `IS NOT NULL` +- `IS TRUE`, `IS NOT TRUE`, `IS FALSE`, `IS NOT FALSE` +- `IN` +- `LIKE`, `NOT LIKE` +- `CAST` +- `regexp_match(column, pattern)` For example, the following filter string is acceptable: @@ -82,29 +80,27 @@ For example, the following filter string is acceptable: === "Javascript" ```javascript - await tbl.search(Array(1536).fill(0)) - .where("(item IN ('item 0', 'item 2')) AND (id > 10)") - .execute() + --8<-- "src/sql_legacy.ts:vec_search" ``` - If your column name contains special characters or is a [SQL Keyword](https://docs.rs/sqlparser/latest/sqlparser/keywords/index.html), you can use backtick (`` ` ``) to escape it. For nested fields, each segment of the path must be wrapped in backticks. === "SQL" + ```sql `CUBE` = 10 AND `column name with space` IS NOT NULL AND `nested with space`.`inner with space` < 2 ``` -!!! warning - Field names containing periods (``.``) are not supported. +!!!warning "Field names containing periods (`.`) are not supported." Literals for dates, timestamps, and decimals can be written by writing the string value after the type name. For example === "SQL" + ```sql date_col = date '2021-01-01' and timestamp_col = timestamp '2021-01-01 00:00:00' @@ -114,49 +110,47 @@ value after the type name. For example For timestamp columns, the precision can be specified as a number in the type parameter. Microsecond precision (6) is the default. -| SQL | Time unit | -|------------------|--------------| -| ``timestamp(0)`` | Seconds | -| ``timestamp(3)`` | Milliseconds | -| ``timestamp(6)`` | Microseconds | -| ``timestamp(9)`` | Nanoseconds | +| SQL | Time unit | +| -------------- | ------------ | +| `timestamp(0)` | Seconds | +| `timestamp(3)` | Milliseconds | +| `timestamp(6)` | Microseconds | +| `timestamp(9)` | Nanoseconds | LanceDB internally stores data in [Apache Arrow](https://arrow.apache.org/) format. The mapping from SQL types to Arrow types is: -| SQL type | Arrow type | -|----------|------------| -| ``boolean`` | ``Boolean`` | -| ``tinyint`` / ``tinyint unsigned`` | ``Int8`` / ``UInt8`` | -| ``smallint`` / ``smallint unsigned`` | ``Int16`` / ``UInt16`` | -| ``int`` or ``integer`` / ``int unsigned`` or ``integer unsigned`` | ``Int32`` / ``UInt32`` | -| ``bigint`` / ``bigint unsigned`` | ``Int64`` / ``UInt64`` | -| ``float`` | ``Float32`` | -| ``double`` | ``Float64`` | -| ``decimal(precision, scale)`` | ``Decimal128`` | -| ``date`` | ``Date32`` | -| ``timestamp`` | ``Timestamp`` [^1] | -| ``string`` | ``Utf8`` | -| ``binary`` | ``Binary`` | +| SQL type | Arrow type | +| --------------------------------------------------------- | ------------------ | +| `boolean` | `Boolean` | +| `tinyint` / `tinyint unsigned` | `Int8` / `UInt8` | +| `smallint` / `smallint unsigned` | `Int16` / `UInt16` | +| `int` or `integer` / `int unsigned` or `integer unsigned` | `Int32` / `UInt32` | +| `bigint` / `bigint unsigned` | `Int64` / `UInt64` | +| `float` | `Float32` | +| `double` | `Float64` | +| `decimal(precision, scale)` | `Decimal128` | +| `date` | `Date32` | +| `timestamp` | `Timestamp` [^1] | +| `string` | `Utf8` | +| `binary` | `Binary` | [^1]: See precision mapping in previous table. - ## Filtering without Vector Search You can also filter your data without search. === "Python" - ```python - tbl.search().where("id = 10").limit(10).to_arrow() - ``` + + ```python + tbl.search().where("id = 10").limit(10).to_arrow() + ``` === "JavaScript" - ```javascript - await tbl.where('id = 10').limit(10).execute() - ``` -!!! warning - If your table is large, this could potentially return a very large - amount of data. Please be sure to use a `limit` clause unless - you're sure you want to return the whole result set. + ```javascript + --8<---- "src/sql_legacy.ts:sql_search" + ``` + +!!!warning "If your table is large, this could potentially return a very large amount of data. Please be sure to use a `limit` clause unless you're sure you want to return the whole result set." diff --git a/docs/src/sql_legacy.ts b/docs/src/sql_legacy.ts new file mode 100644 index 00000000..76366481 --- /dev/null +++ b/docs/src/sql_legacy.ts @@ -0,0 +1,38 @@ +import * as vectordb from "vectordb"; + +(async () => { + const db = await vectordb.connect("data/sample-lancedb"); + + let data = []; + for (let i = 0; i < 10_000; i++) { + data.push({ + vector: Array(1536).fill(i), + id: i, + item: `item ${i}`, + strId: `${i}`, + }); + } + const tbl = await db.createTable("myVectors", data); + + // --8<-- [start:search] + let result = await tbl + .search(Array(1536).fill(0.5)) + .limit(1) + .filter("id = 10") + .prefilter(true) + .execute(); + // --8<-- [end:search] + + // --8<-- [start:vec_search] + await tbl + .search(Array(1536).fill(0)) + .where("(item IN ('item 0', 'item 2')) AND (id > 10)") + .execute(); + // --8<-- [end:vec_search] + + // --8<-- [start:sql_search] + await tbl.filter("id = 10").limit(10).execute(); + // --8<-- [end:sql_search] + + console.log("SQL search: done"); +})(); diff --git a/docs/test/md_testing.js b/docs/test/md_testing.js deleted file mode 100644 index fae34fb6..00000000 --- a/docs/test/md_testing.js +++ /dev/null @@ -1,60 +0,0 @@ -const glob = require("glob"); -const fs = require("fs"); -const path = require("path"); - -const globString = "../src/**/*.md"; - -const excludedGlobs = [ - "../src/fts.md", - "../src/embedding.md", - "../src/examples/*.md", - "../src/guides/tables.md", - "../src/guides/storage.md", - "../src/embeddings/*.md", - "../src/javascript/**/*.md", - "../src/basic.md", -]; - -const nodePrefix = "javascript"; -const nodeFile = ".js"; -const nodeFolder = "node"; -const asyncPrefix = "(async () => {\n"; -const asyncSuffix = "})();"; - -function* yieldLines(lines, prefix, suffix) { - let inCodeBlock = false; - for (const line of lines) { - if (line.trim().startsWith(prefix + nodePrefix)) { - inCodeBlock = true; - } else if (inCodeBlock && line.trim().startsWith(suffix)) { - inCodeBlock = false; - yield "\n"; - } else if (inCodeBlock) { - yield line; - } - } -} - -const files = glob.sync(globString, { recursive: true }); -const excludedFiles = glob.sync(excludedGlobs, { recursive: true }); - -for (const file of files.filter((file) => !excludedFiles.includes(file))) { - const lines = []; - const data = fs.readFileSync(file, "utf-8"); - const fileLines = data.split("\n"); - - for (const line of yieldLines(fileLines, "```", "```")) { - lines.push(line); - } - - if (lines.length > 0) { - const fileName = path.basename(file, ".md"); - const outPath = path.join(nodeFolder, fileName, `${fileName}${nodeFile}`); - console.log(outPath); - fs.mkdirSync(path.dirname(outPath), { recursive: true }); - fs.writeFileSync( - outPath, - asyncPrefix + "\n" + lines.join("\n") + asyncSuffix - ); - } -} diff --git a/docs/test/package.json b/docs/test/package.json deleted file mode 100644 index 37c676c8..00000000 --- a/docs/test/package.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "name": "lancedb-docs-test", - "version": "1.0.0", - "description": "", - "author": "", - "license": "ISC", - "dependencies": { - "fs": "^0.0.1-security", - "glob": "^10.2.7", - "path": "^0.12.7", - "vectordb": "https://gitpkg.now.sh/lancedb/lancedb/node?main" - } -} diff --git a/node/src/index.ts b/node/src/index.ts index 50661d9e..bb2af069 100644 --- a/node/src/index.ts +++ b/node/src/index.ts @@ -443,6 +443,8 @@ export interface Table { */ indexStats: (indexUuid: string) => Promise + filter (value: string): Query + schema: Promise } diff --git a/node/src/remote/index.ts b/node/src/remote/index.ts index e49ae3be..b08d9e6c 100644 --- a/node/src/remote/index.ts +++ b/node/src/remote/index.ts @@ -270,6 +270,10 @@ export class RemoteTable implements Table { return new RemoteQuery(query, this._client, this._name) //, this._embeddings_new) } + filter (where: string): Query { + throw new Error('Not implemented') + } + async add (data: Array> | ArrowTable): Promise { let tbl: ArrowTable if (data instanceof ArrowTable) {