From 377a564904a90f535bcb3ba11819495edb902e44 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sat, 14 Oct 2023 23:07:43 +0200 Subject: [PATCH] docs: switch python examples to be row based (#554) --- docs/src/guides/tables.md | 23 +++++++-------- docs/src/notebooks/tables_guide.ipynb | 42 ++++++++++++--------------- docs/src/python/arrow.md | 1 - docs/src/python/duckdb.md | 10 +++---- python/lancedb/table.py | 15 ++++++---- 5 files changed, 41 insertions(+), 50 deletions(-) diff --git a/docs/src/guides/tables.md b/docs/src/guides/tables.md index c75b333f..d1428c5c 100644 --- a/docs/src/guides/tables.md +++ b/docs/src/guides/tables.md @@ -251,8 +251,9 @@ After a table has been created, you can always add more data to it using ### Adding Pandas DataFrame ```python - df = pd.DataFrame([{"vector": [1.3, 1.4], "item": "fizz", "price": 100.0}, - {"vector": [9.5, 56.2], "item": "buzz", "price": 200.0}]) + df = pd.DataFrame({ + "vector": [[1.3, 1.4], [9.5, 56.2]], "item": ["fizz", "buzz"], "price": [100.0, 200.0] + }) tbl.add(df) ``` @@ -261,17 +262,12 @@ After a table has been created, you can always add more data to it using ### Adding to table using Iterator ```python - import pandas as pd - def make_batches(): for i in range(5): - yield pd.DataFrame( - { - "vector": [[3.1, 4.1], [1, 1]], - "item": ["foo", "bar"], - "price": [10.0, 20.0], - }) - + yield [ + {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, + {"vector": [5.9, 26.5], "item": "bar", "price": 20.0} + ] tbl.add(make_batches()) ``` @@ -306,9 +302,10 @@ Use the `delete()` method on tables to delete rows from a table. To choose which ```python import lancedb - import pandas as pd - data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]}) + data = [{"x": 1, "vector": [1, 2]}, + {"x": 2, "vector": [3, 4]}, + {"x": 3, "vector": [5, 6]}] db = lancedb.connect("./.lancedb") table = db.create_table("my_table", data) table.to_pandas() diff --git a/docs/src/notebooks/tables_guide.ipynb b/docs/src/notebooks/tables_guide.ipynb index 6d8431ec..d7c46043 100644 --- a/docs/src/notebooks/tables_guide.ipynb +++ b/docs/src/notebooks/tables_guide.ipynb @@ -114,13 +114,10 @@ } ], "source": [ - "import pandas as pd\n", - "\n", - "data = pd.DataFrame({\n", - " \"vector\": [[1.1, 1.2], [0.2, 1.8]],\n", - " \"lat\": [45.5, 40.1],\n", - " \"long\": [-122.7, -74.1]\n", - "})\n", + "data = [\n", + " {\"vector\": [1.1, 1.2], \"lat\": 45.5, \"long\": -122.7},\n", + " {\"vector\": [0.2, 1.8], \"lat\": 40.1, \"long\": -74.1},\n", + "]\n", "\n", "db.create_table(\"table2\", data)\n", "\n", @@ -366,11 +363,11 @@ "def make_batches():\n", " for i in range(5):\n", " yield pd.DataFrame(\n", - " {\n", - " \"vector\": [[3.1, 4.1], [1, 1]],\n", - " \"item\": [\"foo\", \"bar\"],\n", - " \"price\": [10.0, 20.0],\n", - " })\n", + " {\n", + " \"vector\": [[3.1, 4.1], [1, 1]],\n", + " \"item\": [\"foo\", \"bar\"],\n", + " \"price\": [10.0, 20.0],\n", + " })\n", "\n", "tbl = db.create_table(\"table5\", make_batches(), schema=PydanticSchema)\n", "tbl.schema" @@ -572,9 +569,11 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.DataFrame([{\"vector\": [1.3, 1.4], \"item\": \"fizz\", \"price\": 100.0},\n", - " {\"vector\": [9.5, 56.2], \"item\": \"buzz\", \"price\": 200.0}])\n", - "tbl.add(df)" + "data = [\n", + " {\"vector\": [1.3, 1.4], \"item\": \"fizz\", \"price\": 100.0},\n", + " {\"vector\": [9.5, 56.2], \"item\": \"buzz\", \"price\": 200.0}\n", + "]\n", + "tbl.add(data)" ] }, { @@ -596,17 +595,12 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "import pandas as pd\n", - "\n", "def make_batches():\n", " for i in range(5):\n", - " yield pd.DataFrame(\n", - " {\n", - " \"vector\": [[3.1, 4.1], [1, 1]],\n", - " \"item\": [\"foo\", \"bar\"],\n", - " \"price\": [10.0, 20.0],\n", - " })\n", + " yield [\n", + " {\"vector\": [3.1, 4.1], \"item\": \"foo\", \"price\": 10.0},\n", + " {\"vector\": [1, 1], \"item\": \"bar\", \"price\": 20.0},\n", + " ]\n", "tbl.add(make_batches())" ] }, diff --git a/docs/src/python/arrow.md b/docs/src/python/arrow.md index b05018a0..6f189945 100644 --- a/docs/src/python/arrow.md +++ b/docs/src/python/arrow.md @@ -39,7 +39,6 @@ to lazily generate data: from typing import Iterable import pyarrow as pa -import lancedb def make_batches() -> Iterable[pa.RecordBatch]: for i in range(5): diff --git a/docs/src/python/duckdb.md b/docs/src/python/duckdb.md index 1b429a8b..1fec16bb 100644 --- a/docs/src/python/duckdb.md +++ b/docs/src/python/duckdb.md @@ -11,15 +11,13 @@ pip install duckdb lancedb We will re-use [the dataset created previously](./arrow.md): ```python -import pandas as pd import lancedb db = lancedb.connect("data/sample-lancedb") -data = pd.DataFrame({ - "vector": [[3.1, 4.1], [5.9, 26.5]], - "item": ["foo", "bar"], - "price": [10.0, 20.0] -}) +data = [ + {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}, + {"vector": [5.9, 26.5], "item": "bar", "price": 20.0} +] table = db.create_table("pd_table", data=data) arrow_table = table.to_arrow() ``` diff --git a/python/lancedb/table.py b/python/lancedb/table.py index 6b1b3ffa..32677626 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -291,8 +291,9 @@ class Table(ABC): Examples -------- >>> import lancedb - >>> import pandas as pd - >>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]}) + >>> data = [ + ... {"x": 1, "vector": [1, 2]}, {"x": 2, "vector": [3, 4]}, {"x": 3, "vector": [5, 6]} + ... ] >>> db = lancedb.connect("./.lancedb") >>> table = db.create_table("my_table", data) >>> table.to_pandas() @@ -713,8 +714,9 @@ class LanceTable(Table): Examples -------- >>> import lancedb - >>> import pandas as pd - >>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]}) + >>> data = [ + ... {"x": 1, "vector": [1, 2]}, {"x": 2, "vector": [3, 4]}, {"x": 3, "vector": [5, 6]} + ... ] >>> db = lancedb.connect("./.lancedb") >>> table = db.create_table("my_table", data) >>> table.to_pandas() @@ -829,8 +831,9 @@ class LanceTable(Table): Examples -------- >>> import lancedb - >>> import pandas as pd - >>> data = pd.DataFrame({"x": [1, 2, 3], "vector": [[1, 2], [3, 4], [5, 6]]}) + >>> data = [ + ... {"x": 1, "vector": [1, 2]}, {"x": 2, "vector": [3, 4]}, {"x": 3, "vector": [5, 6]} + ... ] >>> db = lancedb.connect("./.lancedb") >>> table = db.create_table("my_table", data) >>> table.to_pandas()