mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-04 02:42:57 +00:00
Compare commits
5 Commits
python-v0.
...
ayush/rera
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d8f43ae0d3 | ||
|
|
d564f6eacb | ||
|
|
ed5d1fb557 | ||
|
|
85046a1156 | ||
|
|
b67689e1be |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.7.0"
|
current_version = "0.7.1"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
29
.github/workflows/npm-publish.yml
vendored
29
.github/workflows/npm-publish.yml
vendored
@@ -7,6 +7,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
node:
|
node:
|
||||||
|
name: vectordb Typescript
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -39,6 +40,7 @@ jobs:
|
|||||||
node/vectordb-*.tgz
|
node/vectordb-*.tgz
|
||||||
|
|
||||||
node-macos:
|
node-macos:
|
||||||
|
name: vectordb ${{ matrix.config.arch }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
@@ -69,6 +71,7 @@ jobs:
|
|||||||
node/dist/lancedb-vectordb-darwin*.tgz
|
node/dist/lancedb-vectordb-darwin*.tgz
|
||||||
|
|
||||||
nodejs-macos:
|
nodejs-macos:
|
||||||
|
name: lancedb ${{ matrix.config.arch }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
@@ -99,7 +102,7 @@ jobs:
|
|||||||
nodejs/dist/*.node
|
nodejs/dist/*.node
|
||||||
|
|
||||||
node-linux:
|
node-linux:
|
||||||
name: node-linux (${{ matrix.config.arch}}-unknown-linux-gnu
|
name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
|
||||||
runs-on: ${{ matrix.config.runner }}
|
runs-on: ${{ matrix.config.runner }}
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -139,7 +142,7 @@ jobs:
|
|||||||
node/dist/lancedb-vectordb-linux*.tgz
|
node/dist/lancedb-vectordb-linux*.tgz
|
||||||
|
|
||||||
nodejs-linux:
|
nodejs-linux:
|
||||||
name: nodejs-linux (${{ matrix.config.arch}}-unknown-linux-gnu
|
name: lancedb (${{ matrix.config.arch}}-unknown-linux-gnu
|
||||||
runs-on: ${{ matrix.config.runner }}
|
runs-on: ${{ matrix.config.runner }}
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -190,6 +193,7 @@ jobs:
|
|||||||
!nodejs/dist/*.node
|
!nodejs/dist/*.node
|
||||||
|
|
||||||
node-windows:
|
node-windows:
|
||||||
|
name: vectordb ${{ matrix.target }}
|
||||||
runs-on: windows-2022
|
runs-on: windows-2022
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -223,6 +227,7 @@ jobs:
|
|||||||
node/dist/lancedb-vectordb-win32*.tgz
|
node/dist/lancedb-vectordb-win32*.tgz
|
||||||
|
|
||||||
nodejs-windows:
|
nodejs-windows:
|
||||||
|
name: lancedb ${{ matrix.target }}
|
||||||
runs-on: windows-2022
|
runs-on: windows-2022
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
if: startsWith(github.ref, 'refs/tags/v')
|
if: startsWith(github.ref, 'refs/tags/v')
|
||||||
@@ -256,6 +261,7 @@ jobs:
|
|||||||
nodejs/dist/*.node
|
nodejs/dist/*.node
|
||||||
|
|
||||||
release:
|
release:
|
||||||
|
name: vectordb NPM Publish
|
||||||
needs: [node, node-macos, node-linux, node-windows]
|
needs: [node, node-macos, node-linux, node-windows]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
@@ -284,8 +290,18 @@ jobs:
|
|||||||
for filename in *.tgz; do
|
for filename in *.tgz; do
|
||||||
npm publish $PUBLISH_ARGS $filename
|
npm publish $PUBLISH_ARGS $filename
|
||||||
done
|
done
|
||||||
|
- name: Notify Slack Action
|
||||||
|
uses: ravsamhq/notify-slack-action@2.3.0
|
||||||
|
if: ${{ always() }}
|
||||||
|
with:
|
||||||
|
status: ${{ job.status }}
|
||||||
|
notify_when: "failure"
|
||||||
|
notification_title: "{workflow} is failing"
|
||||||
|
env:
|
||||||
|
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
|
||||||
|
|
||||||
release-nodejs:
|
release-nodejs:
|
||||||
|
name: lancedb NPM Publish
|
||||||
needs: [nodejs-macos, nodejs-linux, nodejs-windows]
|
needs: [nodejs-macos, nodejs-linux, nodejs-windows]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
# Only runs on tags that matches the make-release action
|
# Only runs on tags that matches the make-release action
|
||||||
@@ -333,6 +349,15 @@ jobs:
|
|||||||
else
|
else
|
||||||
npm publish --access public
|
npm publish --access public
|
||||||
fi
|
fi
|
||||||
|
- name: Notify Slack Action
|
||||||
|
uses: ravsamhq/notify-slack-action@2.3.0
|
||||||
|
if: ${{ always() }}
|
||||||
|
with:
|
||||||
|
status: ${{ job.status }}
|
||||||
|
notify_when: "failure"
|
||||||
|
notification_title: "{workflow} is failing"
|
||||||
|
env:
|
||||||
|
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
|
||||||
|
|
||||||
update-package-lock:
|
update-package-lock:
|
||||||
needs: [release]
|
needs: [release]
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ COPY install_protobuf.sh install_protobuf.sh
|
|||||||
RUN ./install_protobuf.sh ${ARCH}
|
RUN ./install_protobuf.sh ${ARCH}
|
||||||
|
|
||||||
ENV DOCKER_USER=${DOCKER_USER}
|
ENV DOCKER_USER=${DOCKER_USER}
|
||||||
# Create a group and user
|
# Create a group and user, but only if it doesn't exist
|
||||||
RUN echo ${ARCH} && adduser --user-group --create-home --uid ${DOCKER_USER} build_user
|
RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
|
||||||
|
|
||||||
# We switch to the user to install Rust and Node, since those like to be
|
# We switch to the user to install Rust and Node, since those like to be
|
||||||
# installed at the user level.
|
# installed at the user level.
|
||||||
|
|||||||
4
node/package-lock.json
generated
4
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-x64",
|
"name": "@lancedb/lancedb-darwin-x64",
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.darwin-x64.node",
|
"main": "lancedb.darwin-x64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"vector database",
|
"vector database",
|
||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"version": "0.7.0",
|
"version": "0.7.1",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
|
|
||||||
@@ -18,13 +19,16 @@ class ColbertReranker(Reranker):
|
|||||||
The name of the column to use as input to the cross encoder model.
|
The name of the column to use as input to the cross encoder model.
|
||||||
return_score : str, default "relevance"
|
return_score : str, default "relevance"
|
||||||
options are "relevance" or "all". Only "relevance" is supported for now.
|
options are "relevance" or "all". Only "relevance" is supported for now.
|
||||||
|
device : str, default "None"
|
||||||
|
The device to use for the model. If "auto", will use "cuda" if available, else "cpu".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model_name: str = "colbert-ir/colbertv2.0",
|
model_name: str = "colbert-ir/colbertv2.0",
|
||||||
column: str = "text",
|
column: str = "text",
|
||||||
return_score="relevance",
|
return_score: str="relevance",
|
||||||
|
device: Union[str, None] = None,
|
||||||
):
|
):
|
||||||
super().__init__(return_score)
|
super().__init__(return_score)
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
@@ -32,6 +36,10 @@ class ColbertReranker(Reranker):
|
|||||||
self.torch = attempt_import_or_raise(
|
self.torch = attempt_import_or_raise(
|
||||||
"torch"
|
"torch"
|
||||||
) # import here for faster ops later
|
) # import here for faster ops later
|
||||||
|
self.device = device
|
||||||
|
if device is None:
|
||||||
|
self.device = "cuda" if self.torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
|
|
||||||
def _rerank(self, result_set: pa.Table, query: str):
|
def _rerank(self, result_set: pa.Table, query: str):
|
||||||
docs = result_set[self.column].to_pylist()
|
docs = result_set[self.column].to_pylist()
|
||||||
@@ -46,7 +54,7 @@ class ColbertReranker(Reranker):
|
|||||||
for document in docs:
|
for document in docs:
|
||||||
document_encoding = tokenizer(
|
document_encoding = tokenizer(
|
||||||
document, return_tensors="pt", truncation=True, max_length=512
|
document, return_tensors="pt", truncation=True, max_length=512
|
||||||
)
|
).to(self.device)
|
||||||
document_embedding = model(**document_encoding).last_hidden_state
|
document_embedding = model(**document_encoding).last_hidden_state
|
||||||
# Calculate MaxSim score
|
# Calculate MaxSim score
|
||||||
score = self.maxsim(query_embedding.unsqueeze(0), document_embedding)
|
score = self.maxsim(query_embedding.unsqueeze(0), document_embedding)
|
||||||
@@ -116,7 +124,7 @@ class ColbertReranker(Reranker):
|
|||||||
transformers = attempt_import_or_raise("transformers")
|
transformers = attempt_import_or_raise("transformers")
|
||||||
tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name)
|
tokenizer = transformers.AutoTokenizer.from_pretrained(self.model_name)
|
||||||
model = transformers.AutoModel.from_pretrained(self.model_name)
|
model = transformers.AutoModel.from_pretrained(self.model_name)
|
||||||
|
model.to(self.device)
|
||||||
return tokenizer, model
|
return tokenizer, model
|
||||||
|
|
||||||
def maxsim(self, query_embedding, document_embedding):
|
def maxsim(self, query_embedding, document_embedding):
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ class CrossEncoderReranker(Reranker):
|
|||||||
@cached_property
|
@cached_property
|
||||||
def model(self):
|
def model(self):
|
||||||
sbert = attempt_import_or_raise("sentence_transformers")
|
sbert = attempt_import_or_raise("sentence_transformers")
|
||||||
cross_encoder = sbert.CrossEncoder(self.model_name)
|
cross_encoder = sbert.CrossEncoder(self.model_name).to(self.device)
|
||||||
|
|
||||||
return cross_encoder
|
return cross_encoder
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-node"
|
name = "lancedb-node"
|
||||||
version = "0.7.0"
|
version = "0.7.1"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.7.0"
|
version = "0.7.1"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
Reference in New Issue
Block a user