From 242802ec7453a72dd5a7cb29d5247e874ae4249c Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Wed, 22 Mar 2023 16:13:48 -0700 Subject: [PATCH 1/5] hello world mkdocs --- .gitignore | 6 +++++- docs/{docs => }/index.md | 0 docs/mkdocs.yml => mkdocs.yml | 2 -- 3 files changed, 5 insertions(+), 3 deletions(-) rename docs/{docs => }/index.md (100%) rename docs/mkdocs.yml => mkdocs.yml (57%) diff --git a/.gitignore b/.gitignore index 82273582..a963eafe 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,8 @@ **/__pycache__ rust/target -rust/Cargo.lock \ No newline at end of file +rust/Cargo.lock + +site + +.pytest_cache \ No newline at end of file diff --git a/docs/docs/index.md b/docs/index.md similarity index 100% rename from docs/docs/index.md rename to docs/index.md diff --git a/docs/mkdocs.yml b/mkdocs.yml similarity index 57% rename from docs/mkdocs.yml rename to mkdocs.yml index 70c3765a..f988ae82 100644 --- a/docs/mkdocs.yml +++ b/mkdocs.yml @@ -1,3 +1 @@ site_name: LanceDB Documentation -theme: - name: material From aeb3c52c8c2c481abf448031aae402c4e0beac92 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Wed, 22 Mar 2023 17:45:51 -0700 Subject: [PATCH 2/5] restore material theme and add mkdocstring for autodoc etc --- .github/workflows/docs.yml | 36 ++++++++++++++++++++++++++++++++++++ docs/index.md | 1 + docs/python.md | 12 ++++++++++++ docs/requirements.txt | 3 +++ mkdocs.yml | 11 +++++++++++ 5 files changed, 63 insertions(+) create mode 100644 .github/workflows/docs.yml create mode 100644 docs/python.md create mode 100644 docs/requirements.txt diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..293ca5ad --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,36 @@ +name: Deploy docs to Pages + +on: + push: + branches: ["main"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Allow one concurrent deployment +concurrency: + group: "rtd" + cancel-in-progress: true + +jobs: + # Single deploy job since we're just deploying + build: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: 'pip' + cache-dependency-path: "docs/requirements.txt" + - name: Build Python + working-directory: python + run: | + python -m pip install -e . + python -m pip install -r ../docs/requirements.txt + - name: Build docs + working-directory: docs + run: | + mkdoc build diff --git a/docs/index.md b/docs/index.md index 7d10cdb8..fca36679 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,3 +17,4 @@ LanceDB's core is written in Rust 🦀 and is built using Lance, an open-source ## Documentation Quick Links * `Quick start` - search and filter a hello world vector dataset with LanceDB using the Python SDK. +* [`API Reference`](python.md) - detailed documentation for the LanceDB Python SDK. \ No newline at end of file diff --git a/docs/python.md b/docs/python.md new file mode 100644 index 00000000..c04c45a4 --- /dev/null +++ b/docs/python.md @@ -0,0 +1,12 @@ +# LanceDB Python API Reference + +## Installation + +```shell +pip install lancedb +``` + +::: lancedb +::: lancedb.db +::: lancedb.table +::: lancedb.query diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..1fbe6277 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +mkdocs==1.4.2 +mkdocs-material==9.1.3 +mkdocstrings[python]==0.20.0 diff --git a/mkdocs.yml b/mkdocs.yml index f988ae82..d23cdb90 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1 +1,12 @@ site_name: LanceDB Documentation + +theme: + name: "material" + +plugins: +- search +- mkdocstrings + +nav: +- Home: index.md +- Python API: python.md \ No newline at end of file From 01db9417fa89d3736f85911f0f2195e30563a802 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Wed, 22 Mar 2023 17:59:15 -0700 Subject: [PATCH 3/5] add ruff and black pre-commit hook --- .pre-commit-config.yaml | 11 +++++++++++ python/pyproject.toml | 8 +++++++- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..834faae6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black \ No newline at end of file diff --git a/python/pyproject.toml b/python/pyproject.toml index 06ead89e..b668fd60 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -37,7 +37,13 @@ repository = "https://github.com/eto-ai/lancedb" [project.optional-dependencies] tests = [ - "pytest", + "pytest" +] +dev = [ + "ruff", "pre-commit", "black" +] +docs = [ + "mkdocs", "mkdocs-material", "mkdocstrings[python]" ] [build-system] From 1f42104c779858e2fc0e7b58f2a138914350531c Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Wed, 22 Mar 2023 18:21:11 -0700 Subject: [PATCH 4/5] deploy to github pages --- .github/workflows/docs.yml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 293ca5ad..2cb88be2 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -7,14 +7,23 @@ on: # Allows you to run this workflow manually from the Actions tab workflow_dispatch: +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + # Allow one concurrent deployment concurrency: - group: "rtd" + group: "pages" cancel-in-progress: true jobs: # Single deploy job since we're just deploying build: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-22.04 steps: - name: Checkout @@ -23,7 +32,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: "3.10" - cache: 'pip' + cache: "pip" cache-dependency-path: "docs/requirements.txt" - name: Build Python working-directory: python @@ -34,3 +43,12 @@ jobs: working-directory: docs run: | mkdoc build + - name: Setup Pages + uses: actions/configure-pages@v2 + - name: Upload artifact + uses: actions/upload-pages-artifact@v1 + with: + path: "site" + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v1 From 5ef51418123c997e8375fa464dfe3cf6ee33dd88 Mon Sep 17 00:00:00 2001 From: Chang She <759245+changhiskhan@users.noreply.github.com> Date: Wed, 22 Mar 2023 18:29:07 -0700 Subject: [PATCH 5/5] black --- python/lancedb/db.py | 5 ++-- python/lancedb/query.py | 9 ++----- python/lancedb/table.py | 5 ++-- python/tests/test_db.py | 10 +++++--- python/tests/test_query.py | 29 ++++++++++++----------- python/tests/test_table.py | 48 +++++++++++++++++++++++++------------- 6 files changed, 63 insertions(+), 43 deletions(-) diff --git a/python/lancedb/db.py b/python/lancedb/db.py index e0ab2fea..3db1c583 100644 --- a/python/lancedb/db.py +++ b/python/lancedb/db.py @@ -53,8 +53,9 @@ class LanceDBConnection: def __getitem__(self, name: str) -> LanceTable: return self.open_table(name) - def create_table(self, name: str, data: DATA = None, - schema: pa.Schema = None) -> LanceTable: + def create_table( + self, name: str, data: DATA = None, schema: pa.Schema = None + ) -> LanceTable: """Create a table in the database. Parameters diff --git a/python/lancedb/query.py b/python/lancedb/query.py index 07c022f8..14ac2083 100644 --- a/python/lancedb/query.py +++ b/python/lancedb/query.py @@ -76,17 +76,12 @@ class LanceQueryBuilder: return self def to_df(self) -> pd.DataFrame: - """Execute the query and return the results as a pandas DataFrame. - """ + """Execute the query and return the results as a pandas DataFrame.""" ds = self._table.to_lance() # TODO indexed search tbl = ds.to_table( columns=self._columns, filter=self._where, - nearest={ - "column": VECTOR_COLUMN_NAME, - "q": self._query, - "k": self._limit - } + nearest={"column": VECTOR_COLUMN_NAME, "q": self._query, "k": self._limit}, ) return tbl.to_pandas() diff --git a/python/lancedb/table.py b/python/lancedb/table.py index ce0d5fb5..7840f396 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -131,8 +131,9 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table: return data # cast the columns to the expected types data = data.combine_chunks() - return pa.Table.from_arrays([data[name] for name in schema.names], - schema=schema) + return pa.Table.from_arrays( + [data[name] for name in schema.names], schema=schema + ) # just check the vector column return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME) diff --git a/python/tests/test_db.py b/python/tests/test_db.py index 7ce51e71..956ce505 100644 --- a/python/tests/test_db.py +++ b/python/tests/test_db.py @@ -20,9 +20,13 @@ def test_basic(tmp_path): assert db.uri == str(tmp_path) assert db.table_names() == [] - table = db.create_table("test", - data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, - {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]) + table = db.create_table( + "test", + data=[ + {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, + {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}, + ], + ) rs = table.search([100, 100]).limit(1).to_df() assert len(rs) == 1 assert rs["item"].iloc[0] == "bar" diff --git a/python/tests/test_query.py b/python/tests/test_query.py index 692debcc..c08cdd8f 100644 --- a/python/tests/test_query.py +++ b/python/tests/test_query.py @@ -21,7 +21,6 @@ import pytest class MockTable: - def __init__(self, tmp_path): self.uri = tmp_path @@ -31,16 +30,22 @@ class MockTable: @pytest.fixture def table(tmp_path) -> MockTable: - df = pd.DataFrame({ - "vector": [[1, 2], [3, 4]], - "id": [1, 2], - "str_field": ["a", "b"], - "float_field": [1.0, 2.0] - }) - schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2)), - pa.field("id", pa.int32()), - pa.field("str_field", pa.string()), - pa.field("float_field", pa.float64())]) + df = pd.DataFrame( + { + "vector": [[1, 2], [3, 4]], + "id": [1, 2], + "str_field": ["a", "b"], + "float_field": [1.0, 2.0], + } + ) + schema = pa.schema( + [ + pa.field("vector", pa.list_(pa.float32(), list_size=2)), + pa.field("id", pa.int32()), + pa.field("str_field", pa.string()), + pa.field("float_field", pa.float64()), + ] + ) lance.write_dataset(df, tmp_path, schema) return MockTable(tmp_path) @@ -55,5 +60,3 @@ def test_query_builder_with_filter(table): df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df() assert df["id"].values[0] == 2 assert all(df["vector"].values[0] == [3, 4]) - - diff --git a/python/tests/test_table.py b/python/tests/test_table.py index 3050d69b..e0a93f06 100644 --- a/python/tests/test_table.py +++ b/python/tests/test_table.py @@ -21,7 +21,6 @@ from lancedb.table import LanceTable class MockDB: - def __init__(self, uri: Path): self.uri = uri @@ -33,9 +32,12 @@ def db(tmp_path) -> MockDB: def test_basic(db): ds = LanceTable.create( - db, "test", - data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, - {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}] + db, + "test", + data=[ + {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, + {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}, + ], ).to_lance() table = LanceTable(db, "test") @@ -45,21 +47,35 @@ def test_basic(db): def test_add(db): - schema = pa.schema([pa.field("vector", pa.list_(pa.float32())), - pa.field("item", pa.string()), - pa.field("price", pa.float32())]) - expected = pa.Table.from_arrays([ - pa.array([[3.1, 4.1], [5.9, 26.5]]), - pa.array(["foo", "bar"]), - pa.array([10.0, 20.0]) - ], schema=schema) - data = [[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, - {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]] + schema = pa.schema( + [ + pa.field("vector", pa.list_(pa.float32())), + pa.field("item", pa.string()), + pa.field("price", pa.float32()), + ] + ) + expected = pa.Table.from_arrays( + [ + pa.array([[3.1, 4.1], [5.9, 26.5]]), + pa.array(["foo", "bar"]), + pa.array([10.0, 20.0]), + ], + schema=schema, + ) + data = [ + [ + {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, + {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}, + ] + ] df = pd.DataFrame(data[0]) data.append(df) data.append(pa.Table.from_pandas(df, schema=schema)) for i, d in enumerate(data): - tbl = (LanceTable.create(db, f"test_{i}", data=d, schema=schema) - .to_lance().to_table()) + tbl = ( + LanceTable.create(db, f"test_{i}", data=d, schema=schema) + .to_lance() + .to_table() + ) assert expected == tbl