feat: interval& None value for prev&`next (#252)

* test: for builtin functions

* test: expect fail for `datetime()`

* feat: add `interval()` fn(WIP)

* feat: `interval()` fn in builtin(UNTEST)

* refactor: move `py_vec_obj_to_array` to util.rs

* style: fmt

* test: simple `interval()` cases

* test: `interval()` with `last()`&`first()`

* doc: `ts` param of `interval()`

* log: common_telemetry for logging in script crate

* doc: corrsponding test fn for each .ron file

* feat: change to`mpsc` for schedule_job

* test: schedule_job

* dep: rm rustpython dep in common-function

* refactor: mv `schedule_job` into `Script` trait

* test: change to use `interval` to sample datapoint

* feat: add gen_none_array for generate None Array

* feat: impl Missing value for `prev`&`next`

* test: `sum(prev(values))`

* doc: add comment for why not support Float16 in `prev()`

* feat: add `interval` in py side mock module

* style: cargo fmt

* refactor: according to comments

* refactor: extract `apply_interval_function`

* style: cargo fmt

* refactor: remove `schedule()`

* style: cargo fmt
This commit is contained in:
discord9
2022-09-14 10:48:27 +08:00
committed by GitHub
parent ec99eb0cd0
commit 20dcaa6897
18 changed files with 918 additions and 146 deletions

View File

@@ -44,13 +44,15 @@ def as_table(kline: list):
"rv_60d",
"rv_90d",
"rv_180d"
],
sql="select open_time, close from k_line")
])
def calc_rvs(open_time, close):
from greptime import vector, log, prev, sqrt, datetime, pow, sum
from greptime import vector, log, prev, sqrt, datetime, pow, sum, last
import greptime as g
def calc_rv(close, open_time, time, interval):
mask = (open_time < time) & (open_time > time - interval)
close = close[mask]
open_time = open_time[mask]
close = g.interval(open_time, close, datetime("10m"), lambda x:last(x))
avg_time_interval = (open_time[-1] - open_time[0])/(len(open_time)-1)
ref = log(close/prev(close))
@@ -60,10 +62,10 @@ def calc_rvs(open_time, close):
# how to get env var,
# maybe through accessing scope and serde then send to remote?
timepoint = open_time[-1]
rv_7d = calc_rv(close, open_time, timepoint, datetime("7d"))
rv_15d = calc_rv(close, open_time, timepoint, datetime("15d"))
rv_30d = calc_rv(close, open_time, timepoint, datetime("30d"))
rv_60d = calc_rv(close, open_time, timepoint, datetime("60d"))
rv_90d = calc_rv(close, open_time, timepoint, datetime("90d"))
rv_180d = calc_rv(close, open_time, timepoint, datetime("180d"))
rv_7d = vector([calc_rv(close, open_time, timepoint, datetime("7d"))])
rv_15d = vector([calc_rv(close, open_time, timepoint, datetime("15d"))])
rv_30d = vector([calc_rv(close, open_time, timepoint, datetime("30d"))])
rv_60d = vector([calc_rv(close, open_time, timepoint, datetime("60d"))])
rv_90d = vector([calc_rv(close, open_time, timepoint, datetime("90d"))])
rv_180d = vector([calc_rv(close, open_time, timepoint, datetime("180d"))])
return rv_7d, rv_15d, rv_30d, rv_60d, rv_90d, rv_180d

View File

@@ -7,7 +7,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231300,
"open_time": 300,
"open": "10107",
"high": "10109.34",
"low": "10106.71",
@@ -16,7 +16,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231360,
"open_time": 900,
"open": "10106.79",
"high": "10109.27",
"low": "10105.92",
@@ -25,7 +25,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231420,
"open_time": 1200,
"open": "10106.09",
"high": "10108.75",
"low": "10104.66",
@@ -34,7 +34,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231480,
"open_time": 1800,
"open": "10108.73",
"high": "10109.52",
"low": "10106.07",
@@ -43,7 +43,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231540,
"open_time": 2400,
"open": "10106.38",
"high": "10109.48",
"low": "10104.81",
@@ -52,7 +52,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231600,
"open_time": 3000,
"open": "10106.95",
"high": "10109.48",
"low": "10106.6",
@@ -61,7 +61,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231660,
"open_time": 3600,
"open": "10107.55",
"high": "10109.28",
"low": "10104.68",
@@ -70,7 +70,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231720,
"open_time": 4200,
"open": "10104.68",
"high": "10109.18",
"low": "10104.14",
@@ -79,7 +79,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231780,
"open_time": 4800,
"open": "10108.8",
"high": "10117.36",
"low": "10108.8",
@@ -88,7 +88,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231840,
"open_time": 5400,
"open": "10115.96",
"high": "10119.19",
"low": "10115.96",
@@ -97,7 +97,7 @@
{
"symbol": "BTCUSD",
"period": "1",
"open_time": 1581231900,
"open_time": 6000,
"open": "10117.08",
"high": "10120.73",
"low": "10116.96",

View File

@@ -1,4 +1,4 @@
from .greptime import coprocessor, copr
from .greptime import vector, log, prev, sqrt, pow, datetime, sum
from .greptime import vector, log, prev, next, first, last, sqrt, pow, datetime, sum, interval
from .mock import mock_tester
from .cfg import set_conn_addr, get_conn_addr

View File

@@ -89,6 +89,11 @@ class vector(np.ndarray):
def filter(self, lst_bool):
return self[lst_bool]
def last(lst):
return lst[-1]
def first(lst):
return lst[0]
def prev(lst):
ret = np.zeros(len(lst))
@@ -96,35 +101,22 @@ def prev(lst):
ret[0] = nan
return ret
def next(lst):
ret = np.zeros(len(lst))
ret[:-1] = lst[1:]
ret[-1] = nan
return ret
def query(sql: str):
pass
def interval(arr: list, duration: int, fill, step: None | int = None, explicitOffset=False):
def interval(ts: vector, arr: vector, duration: int, func):
"""
Note that this is a mock function with same functionailty to the actual Python Coprocessor
`arr` is a vector of integral or temporal type.
`duration` is the length of sliding window
`step` being the length when sliding window take a step
`fill` indicate how to fill missing value:
- "prev": use previous
- "post": next
- "linear": linear interpolation, if not possible to interpolate certain types, fallback to prev
- "null": use null
- "none": do not interpolate
"""
if step is None:
step = duration
tot_len = int(np.ceil(len(arr) // step))
slices = np.zeros((tot_len, int(duration)))
for idx, start in enumerate(range(0, len(arr), step)):
slices[idx] = arr[start:(start + duration)]
return slices
start = np.min(ts)
end = np.max(ts)
masks = [(ts >= i) & (ts <= (i+duration)) for i in range(start, end, duration)]
lst_res = [func(arr[mask]) for mask in masks]
return lst_res
def factor(unit: str) -> int:

View File

@@ -4,7 +4,7 @@ it can only run on mock data and support by numpy
"""
from typing import Any
import numpy as np
from .greptime import i32,i64,f32,f64, vector, interval, query, prev, datetime, log, sum, sqrt, pow, nan, copr, coprocessor
from .greptime import i32,i64,f32,f64, vector, interval, prev, datetime, log, sum, sqrt, pow, nan, copr, coprocessor
import inspect
import functools

View File

@@ -26,6 +26,16 @@ def get_db(req:str):
return requests.get("http://{}{}".format(get_conn_addr(), req))
if __name__ == "__main__":
with open("component/script/python/example/kline.json", "r") as kline_file:
kline = json.load(kline_file)
table = as_table(kline["result"])
close = table["close"]
open_time = table["open_time"]
env = {"close":close, "open_time": open_time}
res = mock_tester(calc_rvs, env=env)
print("Mock result:", [i[0] for i in res])
exit()
if len(sys.argv)!=2:
raise Exception("Expect only one address as cmd's args")
set_conn_addr(sys.argv[1])
@@ -42,11 +52,6 @@ if __name__ == "__main__":
open_time = table["open_time"]
init_table(close, open_time)
# print(repr(close), repr(open_time))
# print("calc_rv:", calc_rv(close, open_time, open_time[-1]+datetime("10m"), datetime("7d")))
env = {"close":close, "open_time": open_time}
# print("env:", env)
print("Mock result:", mock_tester(calc_rvs, env=env))
real = calc_rvs()
print(real)
try: