mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-13 23:32:57 +00:00
Qian/query option doc (#615)
- API documentation improvement for queries (table.search) - a small bug fix for the remote API on create_table  
This commit is contained in:
@@ -84,7 +84,9 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
|
||||
context windows that don't cross document boundaries. In this case, we can
|
||||
pass ``document_id`` as the group by.
|
||||
|
||||
>>> contextualize(data).window(4).stride(2).text_col('token').groupby('document_id').to_pandas()
|
||||
>>> (contextualize(data)
|
||||
... .window(4).stride(2).text_col('token').groupby('document_id')
|
||||
... .to_pandas())
|
||||
token document_id
|
||||
0 The quick brown fox 1
|
||||
2 brown fox jumped over 1
|
||||
@@ -92,18 +94,24 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
|
||||
6 the lazy dog 1
|
||||
9 I love sandwiches 2
|
||||
|
||||
``min_window_size`` determines the minimum size of the context windows that are generated
|
||||
This can be used to trim the last few context windows which have size less than
|
||||
``min_window_size``. By default context windows of size 1 are skipped.
|
||||
``min_window_size`` determines the minimum size of the context windows
|
||||
that are generated.This can be used to trim the last few context windows
|
||||
which have size less than ``min_window_size``.
|
||||
By default context windows of size 1 are skipped.
|
||||
|
||||
>>> contextualize(data).window(6).stride(3).text_col('token').groupby('document_id').to_pandas()
|
||||
>>> (contextualize(data)
|
||||
... .window(6).stride(3).text_col('token').groupby('document_id')
|
||||
... .to_pandas())
|
||||
token document_id
|
||||
0 The quick brown fox jumped over 1
|
||||
3 fox jumped over the lazy dog 1
|
||||
6 the lazy dog 1
|
||||
9 I love sandwiches 2
|
||||
|
||||
>>> contextualize(data).window(6).stride(3).min_window_size(4).text_col('token').groupby('document_id').to_pandas()
|
||||
>>> (contextualize(data)
|
||||
... .window(6).stride(3).min_window_size(4).text_col('token')
|
||||
... .groupby('document_id')
|
||||
... .to_pandas())
|
||||
token document_id
|
||||
0 The quick brown fox jumped over 1
|
||||
3 fox jumped over the lazy dog 1
|
||||
@@ -113,7 +121,9 @@ def contextualize(raw_df: "pd.DataFrame") -> Contextualizer:
|
||||
|
||||
|
||||
class Contextualizer:
|
||||
"""Create context windows from a DataFrame. See [lancedb.context.contextualize][]."""
|
||||
"""Create context windows from a DataFrame.
|
||||
See [lancedb.context.contextualize][].
|
||||
"""
|
||||
|
||||
def __init__(self, raw_df):
|
||||
self._text_col = None
|
||||
@@ -183,7 +193,7 @@ class Contextualizer:
|
||||
deprecated_in="0.3.1",
|
||||
removed_in="0.4.0",
|
||||
current_version=__version__,
|
||||
details="Use the bar function instead",
|
||||
details="Use to_pandas() instead",
|
||||
)
|
||||
def to_df(self) -> "pd.DataFrame":
|
||||
return self.to_pandas()
|
||||
|
||||
Reference in New Issue
Block a user