mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-13 23:32:57 +00:00
address PR comments
This commit is contained in:
@@ -10,11 +10,15 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from __future__ import annotations
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def contextualize(raw_df):
|
||||
def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
|
||||
"""Create a Contextualizer object for the given DataFrame.
|
||||
Used to create context windows.
|
||||
"""
|
||||
return Contextualizer(raw_df)
|
||||
|
||||
|
||||
@@ -26,25 +30,56 @@ class Contextualizer:
|
||||
self._window = None
|
||||
self._raw_df = raw_df
|
||||
|
||||
def window(self, window):
|
||||
def window(self, window: int) -> Contextualizer:
|
||||
"""Set the window size. i.e., how many rows to include in each window.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
window: int
|
||||
The window size.
|
||||
"""
|
||||
self._window = window
|
||||
return self
|
||||
|
||||
def stride(self, stride):
|
||||
def stride(self, stride: int) -> Contextualizer:
|
||||
"""Set the stride. i.e., how many rows to skip between each window.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
stride: int
|
||||
The stride.
|
||||
"""
|
||||
self._stride = stride
|
||||
return self
|
||||
|
||||
def groupby(self, groupby):
|
||||
def groupby(self, groupby: str) -> Contextualizer:
|
||||
"""Set the groupby column. i.e., how to group the rows.
|
||||
Windows don't cross groups
|
||||
|
||||
Parameters
|
||||
----------
|
||||
groupby: str
|
||||
The groupby column.
|
||||
"""
|
||||
self._groupby = groupby
|
||||
return self
|
||||
|
||||
def text_col(self, text_col):
|
||||
def text_col(self, text_col: str) -> Contextualizer:
|
||||
"""Set the text column used to make the context window.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text_col: str
|
||||
The text column.
|
||||
"""
|
||||
self._text_col = text_col
|
||||
return self
|
||||
|
||||
def to_df(self):
|
||||
def to_df(self) -> pd.DataFrame:
|
||||
"""Create the context windows and return a DataFrame."""
|
||||
|
||||
def process_group(grp):
|
||||
# For each video, create the text rolling window
|
||||
# For each group, create the text rolling window
|
||||
text = grp[self._text_col].values
|
||||
contexts = grp.iloc[: -self._window : self._stride, :].copy()
|
||||
contexts[self._text_col] = [
|
||||
|
||||
Reference in New Issue
Block a user