address PR comments

This commit is contained in:
Chang She
2023-03-23 17:31:24 -07:00
parent b91139d3c7
commit 826fe320bb
3 changed files with 58 additions and 7 deletions

View File

@@ -10,11 +10,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import pandas as pd
def contextualize(raw_df):
def contextualize(raw_df: pd.DataFrame) -> Contextualizer:
"""Create a Contextualizer object for the given DataFrame.
Used to create context windows.
"""
return Contextualizer(raw_df)
@@ -26,25 +30,56 @@ class Contextualizer:
self._window = None
self._raw_df = raw_df
def window(self, window):
def window(self, window: int) -> Contextualizer:
"""Set the window size. i.e., how many rows to include in each window.
Parameters
----------
window: int
The window size.
"""
self._window = window
return self
def stride(self, stride):
def stride(self, stride: int) -> Contextualizer:
"""Set the stride. i.e., how many rows to skip between each window.
Parameters
----------
stride: int
The stride.
"""
self._stride = stride
return self
def groupby(self, groupby):
def groupby(self, groupby: str) -> Contextualizer:
"""Set the groupby column. i.e., how to group the rows.
Windows don't cross groups
Parameters
----------
groupby: str
The groupby column.
"""
self._groupby = groupby
return self
def text_col(self, text_col):
def text_col(self, text_col: str) -> Contextualizer:
"""Set the text column used to make the context window.
Parameters
----------
text_col: str
The text column.
"""
self._text_col = text_col
return self
def to_df(self):
def to_df(self) -> pd.DataFrame:
"""Create the context windows and return a DataFrame."""
def process_group(grp):
# For each video, create the text rolling window
# For each group, create the text rolling window
text = grp[self._text_col].values
contexts = grp.iloc[: -self._window : self._stride, :].copy()
contexts[self._text_col] = [