mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-26 08:20:39 +00:00
arrow table/f16 example (#907)
This commit is contained in:
@@ -13,7 +13,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"execution_count": 2,
|
||||
"id": "c1b4e34b-a49c-471d-a343-a5940bb5138a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -23,7 +23,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"id": "4e5a8d07-d9a1-48c1-913a-8e0629289579",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -44,7 +44,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 4,
|
||||
"id": "5df12f66-8d99-43ad-8d0b-22189ec0a6b9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -62,7 +62,7 @@
|
||||
"long: [[-122.7,-74.1]]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -90,7 +90,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"id": "f4d87ae9-0ccb-48eb-b31d-bb8f2370e47e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -108,7 +108,7 @@
|
||||
"long: [[-122.7,-74.1]]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -135,10 +135,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"id": "25f34bcf-fca0-4431-8601-eac95d1bd347",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[2024-01-31T18:59:33Z WARN lance::dataset] No existing dataset at /Users/qian/Work/LanceDB/lancedb/docs/src/notebooks/.lancedb/table3.lance, it will be created\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
@@ -148,7 +155,7 @@
|
||||
"long: float"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -171,45 +178,51 @@
|
||||
"id": "4df51925-7ca2-4005-9c72-38b3d26240c6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### From PyArrow Tables\n",
|
||||
"### From an Arrow Table\n",
|
||||
"\n",
|
||||
"You can also create LanceDB tables directly from pyarrow tables"
|
||||
"You can also create LanceDB tables directly from pyarrow tables. LanceDB supports float16 type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"id": "90a880f6-be43-4c9d-ba65-0b05197c0f6f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"vector: fixed_size_list<item: float>[2]\n",
|
||||
" child 0, item: float\n",
|
||||
"item: string\n",
|
||||
"price: double"
|
||||
"vector: fixed_size_list<item: halffloat>[16]\n",
|
||||
" child 0, item: halffloat\n",
|
||||
"text: string"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"table = pa.Table.from_arrays(\n",
|
||||
" [\n",
|
||||
" pa.array([[3.1, 4.1], [5.9, 26.5]],\n",
|
||||
" pa.list_(pa.float32(), 2)),\n",
|
||||
" pa.array([\"foo\", \"bar\"]),\n",
|
||||
" pa.array([10.0, 20.0]),\n",
|
||||
" ],\n",
|
||||
" [\"vector\", \"item\", \"price\"],\n",
|
||||
" )\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"db = lancedb.connect(\"db\")\n",
|
||||
"dim = 16\n",
|
||||
"total = 2\n",
|
||||
"schema = pa.schema(\n",
|
||||
" [\n",
|
||||
" pa.field(\"vector\", pa.list_(pa.float16(), dim)),\n",
|
||||
" pa.field(\"text\", pa.string())\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"data = pa.Table.from_arrays(\n",
|
||||
" [\n",
|
||||
" pa.array([np.random.randn(dim).astype(np.float16) for _ in range(total)],\n",
|
||||
" pa.list_(pa.float16(), dim)),\n",
|
||||
" pa.array([\"foo\", \"bar\"])\n",
|
||||
" ],\n",
|
||||
" [\"vector\", \"text\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"tbl = db.create_table(\"test1\", table, mode=\"overwrite\")\n",
|
||||
"tbl = db.create_table(\"f16_tbl\", data, schema=schema)\n",
|
||||
"tbl.schema"
|
||||
]
|
||||
},
|
||||
@@ -225,7 +238,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 8,
|
||||
"id": "d81121d7-e4b7-447c-a48c-974b6ebb464a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -240,7 +253,7 @@
|
||||
"imdb_id: int64 not null"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -282,7 +295,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 9,
|
||||
"id": "bc247142-4e3c-41a2-b94c-8e00d2c2a508",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -292,7 +305,7 @@
|
||||
"LanceTable(table4)"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -333,7 +346,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 10,
|
||||
"id": "25ad3523-e0c9-4c28-b3df-38189c4e0e5f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -346,7 +359,7 @@
|
||||
"price: double not null"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -385,7 +398,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 11,
|
||||
"id": "2814173a-eacc-4dd8-a64d-6312b44582cc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -411,7 +424,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 12,
|
||||
"id": "df9e13c0-41f6-437f-9dfa-2fd71d3d9c45",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -421,7 +434,7 @@
|
||||
"['table6', 'table4', 'table5', 'movielens_small']"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -432,7 +445,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 13,
|
||||
"id": "9343f5ad-6024-42ee-ac2f-6c1471df8679",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -541,7 +554,7 @@
|
||||
"9 [5.9, 26.5] bar 20.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -564,7 +577,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 14,
|
||||
"id": "8a56250f-73a1-4c26-a6ad-5c7a0ce3a9ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -590,7 +603,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 15,
|
||||
"id": "030c7057-b98e-4e2f-be14-b8c1f927f83c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -621,7 +634,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 16,
|
||||
"id": "e7a17de2-08d2-41b7-bd05-f63d1045ab1f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -629,16 +642,16 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"32\n"
|
||||
"22\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"17"
|
||||
"12"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -661,7 +674,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 17,
|
||||
"id": "fe3310bd-08f4-4a22-a63b-b3127d22f9f7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -681,25 +694,20 @@
|
||||
"8 [3.1, 4.1] foo 10.0\n",
|
||||
"9 [3.1, 4.1] foo 10.0\n",
|
||||
"10 [3.1, 4.1] foo 10.0\n",
|
||||
"11 [3.1, 4.1] foo 10.0\n",
|
||||
"12 [3.1, 4.1] foo 10.0\n",
|
||||
"13 [3.1, 4.1] foo 10.0\n",
|
||||
"14 [3.1, 4.1] foo 10.0\n",
|
||||
"15 [3.1, 4.1] foo 10.0\n",
|
||||
"16 [3.1, 4.1] foo 10.0\n"
|
||||
"11 [3.1, 4.1] foo 10.0\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "OSError",
|
||||
"evalue": "LanceError(IO): Error during planning: column foo does not exist",
|
||||
"evalue": "LanceError(IO): Error during planning: column foo does not exist, /Users/runner/work/lance/lance/rust/lance-core/src/error.rs:212:23",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[30], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m to_remove \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mstr\u001b[39m(v) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m to_remove)\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(tbl\u001b[38;5;241m.\u001b[39mto_pandas())\n\u001b[0;32m----> 4\u001b[0m \u001b[43mtbl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mitem IN (\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mto_remove\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m)\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m tbl\u001b[38;5;241m.\u001b[39mto_pandas()\n",
|
||||
"File \u001b[0;32m~/Documents/lancedb/lancedb/python/lancedb/table.py:610\u001b[0m, in \u001b[0;36mLanceTable.delete\u001b[0;34m(self, where)\u001b[0m\n\u001b[1;32m 609\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdelete\u001b[39m(\u001b[38;5;28mself\u001b[39m, where: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 610\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelete\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Documents/lancedb/lancedb/env/lib/python3.11/site-packages/lance/dataset.py:489\u001b[0m, in \u001b[0;36mLanceDataset.delete\u001b[0;34m(self, predicate)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(predicate, pa\u001b[38;5;241m.\u001b[39mcompute\u001b[38;5;241m.\u001b[39mExpression):\n\u001b[1;32m 488\u001b[0m predicate \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(predicate)\n\u001b[0;32m--> 489\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_ds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelete\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[0;31mOSError\u001b[0m: LanceError(IO): Error during planning: column foo does not exist"
|
||||
"Cell \u001b[0;32mIn[17], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m to_remove \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mstr\u001b[39m(v) \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m to_remove)\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(tbl\u001b[38;5;241m.\u001b[39mto_pandas())\n\u001b[0;32m----> 4\u001b[0m \u001b[43mtbl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mitem IN (\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mto_remove\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m)\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Work/LanceDB/lancedb/docs/doc-venv/lib/python3.11/site-packages/lancedb/table.py:872\u001b[0m, in \u001b[0;36mLanceTable.delete\u001b[0;34m(self, where)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdelete\u001b[39m(\u001b[38;5;28mself\u001b[39m, where: \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m--> 872\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelete\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwhere\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/Work/LanceDB/lancedb/docs/doc-venv/lib/python3.11/site-packages/lance/dataset.py:596\u001b[0m, in \u001b[0;36mLanceDataset.delete\u001b[0;34m(self, predicate)\u001b[0m\n\u001b[1;32m 594\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(predicate, pa\u001b[38;5;241m.\u001b[39mcompute\u001b[38;5;241m.\u001b[39mExpression):\n\u001b[1;32m 595\u001b[0m predicate \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(predicate)\n\u001b[0;32m--> 596\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_ds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelete\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpredicate\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[0;31mOSError\u001b[0m: LanceError(IO): Error during planning: column foo does not exist, /Users/runner/work/lance/lance/rust/lance-core/src/error.rs:212:23"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -712,7 +720,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": null,
|
||||
"id": "87d5bc21-847f-4c81-b56e-f6dbe5d05aac",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -729,7 +737,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": null,
|
||||
"id": "9cba4519-eb3a-4941-ab7e-873d762e750f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -742,7 +750,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": null,
|
||||
"id": "5bdc9801-d5ed-4871-92d0-88b27108e788",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -817,7 +825,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user