Merge pull request #8 from lancedb/changhiskhan/mkdocs

hello world mkdocs
This commit is contained in:
Chang She
2023-03-22 18:45:54 -07:00
committed by GitHub
15 changed files with 168 additions and 48 deletions

54
.github/workflows/docs.yml vendored Normal file
View File

@@ -0,0 +1,54 @@
name: Deploy docs to Pages
on:
push:
branches: ["main"]
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
contents: read
pages: write
id-token: write
# Allow one concurrent deployment
concurrency:
group: "pages"
cancel-in-progress: true
jobs:
# Single deploy job since we're just deploying
build:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: "pip"
cache-dependency-path: "docs/requirements.txt"
- name: Build Python
working-directory: python
run: |
python -m pip install -e .
python -m pip install -r ../docs/requirements.txt
- name: Build docs
working-directory: docs
run: |
mkdoc build
- name: Setup Pages
uses: actions/configure-pages@v2
- name: Upload artifact
uses: actions/upload-pages-artifact@v1
with:
path: "site"
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v1

6
.gitignore vendored
View File

@@ -4,4 +4,8 @@
**/__pycache__
rust/target
rust/Cargo.lock
rust/Cargo.lock
site
.pytest_cache

11
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,11 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.12.0
hooks:
- id: black

View File

@@ -17,3 +17,4 @@ LanceDB's core is written in Rust 🦀 and is built using Lance, an open-source
## Documentation Quick Links
* `Quick start` - search and filter a hello world vector dataset with LanceDB using the Python SDK.
* [`API Reference`](python.md) - detailed documentation for the LanceDB Python SDK.

View File

@@ -1,3 +0,0 @@
site_name: LanceDB Documentation
theme:
name: material

12
docs/python.md Normal file
View File

@@ -0,0 +1,12 @@
# LanceDB Python API Reference
## Installation
```shell
pip install lancedb
```
::: lancedb
::: lancedb.db
::: lancedb.table
::: lancedb.query

3
docs/requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
mkdocs==1.4.2
mkdocs-material==9.1.3
mkdocstrings[python]==0.20.0

12
mkdocs.yml Normal file
View File

@@ -0,0 +1,12 @@
site_name: LanceDB Documentation
theme:
name: "material"
plugins:
- search
- mkdocstrings
nav:
- Home: index.md
- Python API: python.md

View File

@@ -53,8 +53,9 @@ class LanceDBConnection:
def __getitem__(self, name: str) -> LanceTable:
return self.open_table(name)
def create_table(self, name: str, data: DATA = None,
schema: pa.Schema = None) -> LanceTable:
def create_table(
self, name: str, data: DATA = None, schema: pa.Schema = None
) -> LanceTable:
"""Create a table in the database.
Parameters

View File

@@ -76,17 +76,12 @@ class LanceQueryBuilder:
return self
def to_df(self) -> pd.DataFrame:
"""Execute the query and return the results as a pandas DataFrame.
"""
"""Execute the query and return the results as a pandas DataFrame."""
ds = self._table.to_lance()
# TODO indexed search
tbl = ds.to_table(
columns=self._columns,
filter=self._where,
nearest={
"column": VECTOR_COLUMN_NAME,
"q": self._query,
"k": self._limit
}
nearest={"column": VECTOR_COLUMN_NAME, "q": self._query, "k": self._limit},
)
return tbl.to_pandas()

View File

@@ -131,8 +131,9 @@ def _sanitize_schema(data: pa.Table, schema: pa.Schema = None) -> pa.Table:
return data
# cast the columns to the expected types
data = data.combine_chunks()
return pa.Table.from_arrays([data[name] for name in schema.names],
schema=schema)
return pa.Table.from_arrays(
[data[name] for name in schema.names], schema=schema
)
# just check the vector column
return _sanitize_vector_column(data, vector_column_name=VECTOR_COLUMN_NAME)

View File

@@ -37,7 +37,13 @@ repository = "https://github.com/eto-ai/lancedb"
[project.optional-dependencies]
tests = [
"pytest",
"pytest"
]
dev = [
"ruff", "pre-commit", "black"
]
docs = [
"mkdocs", "mkdocs-material", "mkdocstrings[python]"
]
[build-system]

View File

@@ -20,9 +20,13 @@ def test_basic(tmp_path):
assert db.uri == str(tmp_path)
assert db.table_names() == []
table = db.create_table("test",
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
table = db.create_table(
"test",
data=[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
],
)
rs = table.search([100, 100]).limit(1).to_df()
assert len(rs) == 1
assert rs["item"].iloc[0] == "bar"

View File

@@ -21,7 +21,6 @@ import pytest
class MockTable:
def __init__(self, tmp_path):
self.uri = tmp_path
@@ -31,16 +30,22 @@ class MockTable:
@pytest.fixture
def table(tmp_path) -> MockTable:
df = pd.DataFrame({
"vector": [[1, 2], [3, 4]],
"id": [1, 2],
"str_field": ["a", "b"],
"float_field": [1.0, 2.0]
})
schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("id", pa.int32()),
pa.field("str_field", pa.string()),
pa.field("float_field", pa.float64())])
df = pd.DataFrame(
{
"vector": [[1, 2], [3, 4]],
"id": [1, 2],
"str_field": ["a", "b"],
"float_field": [1.0, 2.0],
}
)
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), list_size=2)),
pa.field("id", pa.int32()),
pa.field("str_field", pa.string()),
pa.field("float_field", pa.float64()),
]
)
lance.write_dataset(df, tmp_path, schema)
return MockTable(tmp_path)
@@ -55,5 +60,3 @@ def test_query_builder_with_filter(table):
df = LanceQueryBuilder(table, [0, 0]).where("id = 2").to_df()
assert df["id"].values[0] == 2
assert all(df["vector"].values[0] == [3, 4])

View File

@@ -21,7 +21,6 @@ from lancedb.table import LanceTable
class MockDB:
def __init__(self, uri: Path):
self.uri = uri
@@ -33,9 +32,12 @@ def db(tmp_path) -> MockDB:
def test_basic(db):
ds = LanceTable.create(
db, "test",
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]
db,
"test",
data=[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
],
).to_lance()
table = LanceTable(db, "test")
@@ -45,21 +47,35 @@ def test_basic(db):
def test_add(db):
schema = pa.schema([pa.field("vector", pa.list_(pa.float32())),
pa.field("item", pa.string()),
pa.field("price", pa.float32())])
expected = pa.Table.from_arrays([
pa.array([[3.1, 4.1], [5.9, 26.5]]),
pa.array(["foo", "bar"]),
pa.array([10.0, 20.0])
], schema=schema)
data = [[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}]]
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32())),
pa.field("item", pa.string()),
pa.field("price", pa.float32()),
]
)
expected = pa.Table.from_arrays(
[
pa.array([[3.1, 4.1], [5.9, 26.5]]),
pa.array(["foo", "bar"]),
pa.array([10.0, 20.0]),
],
schema=schema,
)
data = [
[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
]
]
df = pd.DataFrame(data[0])
data.append(df)
data.append(pa.Table.from_pandas(df, schema=schema))
for i, d in enumerate(data):
tbl = (LanceTable.create(db, f"test_{i}", data=d, schema=schema)
.to_lance().to_table())
tbl = (
LanceTable.create(db, f"test_{i}", data=d, schema=schema)
.to_lance()
.to_table()
)
assert expected == tbl