Merge pull request #8 from lancedb/changhiskhan/mkdocs

hello world mkdocs
2026-01-15 00:02:59 +00:00 · 2023-03-22 18:45:54 -07:00
parent e2d9bc8c78 5ef5141812
commit 5b47fad0eb
15 changed files with 168 additions and 48 deletions
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,54 @@
+name: Deploy docs to Pages
+
+on:
+  push:
+    branches: ["main"]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow one concurrent deployment
+concurrency:
+  group: "pages"
+  cancel-in-progress: true
+
+jobs:
+  # Single deploy job since we're just deploying
+  build:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+          cache: "pip"
+          cache-dependency-path: "docs/requirements.txt"
+      - name: Build Python
+        working-directory: python
+        run: |
+          python -m pip install -e .
+          python -m pip install -r ../docs/requirements.txt
+      - name: Build docs
+        working-directory: docs
+        run: |
+          mkdoc build
+      - name: Setup Pages
+        uses: actions/configure-pages@v2
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v1
+        with:
+          path: "site"
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v1
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,8 @@
 **/__pycache__

 rust/target
-rust/Cargo.lock
+rust/Cargo.lock
+
+site
+
+.pytest_cache
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,11 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/psf/black
+    rev: 22.12.0
+    hooks:
+    -   id: black
--- a/docs/docs/index.md
+++ b/docs/docs/index.md
@@ -17,3 +17,4 @@ LanceDB's core is written in Rust 🦀 and is built using Lance, an open-source
 ## Documentation Quick Links

 * `Quick start` - search and filter a hello world vector dataset with LanceDB using the Python SDK.
+* [`API Reference`](python.md) - detailed documentation for the LanceDB Python SDK.
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -1,3 +0,0 @@
-site_name: LanceDB Documentation
-theme:
-  name: material
--- a/docs/python.md
+++ b/docs/python.md
@@ -0,0 +1,12 @@
+# LanceDB Python API Reference
+
+## Installation
+
+```shell
+pip install lancedb
+```
+
+::: lancedb
+::: lancedb.db
+::: lancedb.table
+::: lancedb.query
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -0,0 +1,3 @@
+mkdocs==1.4.2
+mkdocs-material==9.1.3
+mkdocstrings[python]==0.20.0
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -0,0 +1,12 @@
+site_name: LanceDB Documentation
+
+theme:
+  name: "material"
+
+plugins:
+- search
+- mkdocstrings
+
+nav:
+- Home: index.md
+- Python API: python.md
--- a/python/lancedb/db.py
+++ b/python/lancedb/db.py
@@ -53,8 +53,9 @@ class LanceDBConnection:
    def __getitem__(self, name: str) -> LanceTable:
        return self.open_table(name)

-    def create_table(self, name: str, data: DATA = None,
-                     schema: pa.Schema = None) -> LanceTable:
+    def create_table(
+        self, name: str, data: DATA = None, schema: pa.Schema = None
+    ) -> LanceTable:
        """Create a table in the database.

        Parameters
--- a/python/lancedb/query.py
+++ b/python/lancedb/query.py
@@ -76,17 +76,12 @@ class LanceQueryBuilder:
        return self

    def to_df(self) -> pd.DataFrame:
-        """Execute the query and return the results as a pandas DataFrame.
-        """
+        """Execute the query and return the results as a pandas DataFrame."""
        ds = self._table.to_lance()
        # TODO indexed search
        tbl = ds.to_table(
            columns=self._columns,
            filter=self._where,
-            nearest={
-                "column": VECTOR_COLUMN_NAME,
-                "q": self._query,
-                "k": self._limit
-            }
+            nearest={"column": VECTOR_COLUMN_NAME, "q": self._query, "k": self._limit},
        )
        return tbl.to_pandas()
--- a/python/lancedb/table.py
+++ b/python/lancedb/table.py
@@ -131,8 +131,9 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
            return data
        # cast the columns to the expected types
        data = data.combine_chunks()
-        return pa.Table.from_arrays([data[name] for name in schema.names],
-                                    schema=schema)
+        return pa.Table.from_arrays(
+            [data[name] for name in schema.names], schema=schema
+        )
    # just check the vector column
    return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)

--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -37,7 +37,13 @@ repository = "https://github.com/eto-ai/lancedb"

 [project.optional-dependencies]
 tests = [
-    "pytest",
+    "pytest"
+]
+dev = [
+    "ruff", "pre-commit", "black"
+]
+docs = [
+    "mkdocs", "mkdocs-material", "mkdocstrings[python]"
 ]

 [build-system]
--- a/python/tests/test_db.py
+++ b/python/tests/test_db.py
@@ -20,9 +20,13 @@ def test_basic(tmp_path):
    assert db.uri == str(tmp_path)
    assert db.table_names() == []

-    table = db.create_table("test",
-                            data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
-                                  {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
+    table = db.create_table(
+        "test",
+        data=[
+            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+        ],
+    )
    rs = table.search([100, 100]).limit(1).to_df()
    assert len(rs) == 1
    assert rs["item"].iloc[0] == "bar"
--- a/python/tests/test_query.py
+++ b/python/tests/test_query.py
@@ -21,7 +21,6 @@ import pytest


 class MockTable:
-
    def __init__(self, tmp_path):
        self.uri = tmp_path

@@ -31,16 +30,22 @@ class MockTable:

@pytest.fixture
 def table(tmp_path) -> MockTable:
-    df = pd.DataFrame({
-        "vector": [[1, 2], [3, 4]],
-        "id": [1, 2],
-        "str_field": ["a", "b"],
-        "float_field": [1.0, 2.0]
-    })
-    schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2)),
-                        pa.field("id", pa.int32()),
-                        pa.field("str_field", pa.string()),
-                        pa.field("float_field", pa.float64())])
+    df = pd.DataFrame(
+        {
+            "vector": [[1, 2], [3, 4]],
+            "id": [1, 2],
+            "str_field": ["a", "b"],
+            "float_field": [1.0, 2.0],
+        }
+    )
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32(), list_size=2)),
+            pa.field("id", pa.int32()),
+            pa.field("str_field", pa.string()),
+            pa.field("float_field", pa.float64()),
+        ]
+    )
    lance.write_dataset(df, tmp_path, schema)
    return MockTable(tmp_path)

@@ -55,5 +60,3 @@ def test_query_builder_with_filter(table):
    df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
    assert df["id"].values[0] == 2
    assert all(df["vector"].values[0] == [3, 4])
-
-
--- a/python/tests/test_table.py
+++ b/python/tests/test_table.py
@@ -21,7 +21,6 @@ from lancedb.table import LanceTable


 class MockDB:
-
    def __init__(self, uri: Path):
        self.uri = uri

@@ -33,9 +32,12 @@ def db(tmp_path) -> MockDB:

 def test_basic(db):
    ds = LanceTable.create(
-        db, "test",
-        data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
-              {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]
+        db,
+        "test",
+        data=[
+            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+        ],
    ).to_lance()

    table = LanceTable(db, "test")
@@ -45,21 +47,35 @@ def test_basic(db):


 def test_add(db):
-    schema = pa.schema([pa.field("vector", pa.list_(pa.float32())),
-                        pa.field("item", pa.string()),
-                        pa.field("price", pa.float32())])
-    expected = pa.Table.from_arrays([
-        pa.array([[3.1, 4.1], [5.9, 26.5]]),
-        pa.array(["foo", "bar"]),
-        pa.array([10.0, 20.0])
-    ], schema=schema)
-    data = [[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
-             {"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]]
+    schema = pa.schema(
+        [
+            pa.field("vector", pa.list_(pa.float32())),
+            pa.field("item", pa.string()),
+            pa.field("price", pa.float32()),
+        ]
+    )
+    expected = pa.Table.from_arrays(
+        [
+            pa.array([[3.1, 4.1], [5.9, 26.5]]),
+            pa.array(["foo", "bar"]),
+            pa.array([10.0, 20.0]),
+        ],
+        schema=schema,
+    )
+    data = [
+        [
+            {"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
+            {"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
+        ]
+    ]
    df = pd.DataFrame(data[0])
    data.append(df)
    data.append(pa.Table.from_pandas(df, schema=schema))

    for i, d in enumerate(data):
-        tbl = (LanceTable.create(db, f"test_{i}", data=d, schema=schema)
-               .to_lance().to_table())
+        tbl = (
+            LanceTable.create(db, f"test_{i}", data=d, schema=schema)
+            .to_lance()
+            .to_table()
+        )
        assert expected == tbl