# ################################## HOW TO USE #################################### #
#                                                                                    #
# This is a Jupyter notebook formatted as a script                                   #
# Format: https://jupytext.readthedocs.io/en/latest/formats.html#the-percent-format  #
#                                                                                    #
# Save this file and remove the '.txt' extension                                     #
# In Jupyter Lab, right click on the Python file -> Open With -> Jupytext Notebook   #
# Make sure to have Jupytext installed: https://github.com/mwouts/jupytext           #
#                                                                                    #
# ################################################################################## #

# %% [markdown]
# #  Cross-validation

# %%
from vectorbtpro import *

data = vbt.BinanceData.pull("BTCUSDT", end="2022-11-01 UTC")
data.index

# %%
@vbt.parameterized(merge_func="concat")
def sma_crossover_perf(data, fast_window, slow_window):
    fast_sma = data.run("sma", fast_window, short_name="fast_sma")
    slow_sma = data.run("sma", slow_window, short_name="slow_sma")
    entries = fast_sma.real_crossed_above(slow_sma)
    exits = fast_sma.real_crossed_below(slow_sma)
    pf = vbt.Portfolio.from_signals(
        data, entries, exits, direction="both")
    return pf.sharpe_ratio

# %%
perf = sma_crossover_perf(
    data["2020":"2020"],
    vbt.Param(np.arange(5, 50), condition="x < slow_window"),
    vbt.Param(np.arange(5, 50)),
    _execute_kwargs=dict(
        clear_cache=50,
        collect_garbage=50
    )
)
perf

# %%
perf.sort_values(ascending=False)

# %%
best_fast_window, best_slow_window = perf.idxmax()
sma_crossover_perf(
    data["2021":"2021"],
    best_fast_window,
    best_slow_window
)

# %%
data["2021":"2021"].run("from_holding").sharpe_ratio

# %%
start_index = data.index[0]
period = pd.Timedelta(days=180)
all_is_bounds = {}
all_is_bl_perf = {}
all_is_perf = {}
all_oos_bounds = {}
all_oos_bl_perf = {}
all_oos_perf = {}
split_idx = 0
period_idx = 0

with vbt.ProgressBar() as pbar:
    while start_index + 2 * period <= data.index[-1]:
        pbar.set_prefix(str(start_index))

        is_start_index = start_index
        is_end_index = start_index + period - pd.Timedelta(nanoseconds=1)
        is_data = data[is_start_index : is_end_index]
        is_bl_perf = is_data.run("from_holding").sharpe_ratio
        is_perf = sma_crossover_perf(
            is_data,
            vbt.Param(np.arange(5, 50), condition="x < slow_window"),
            vbt.Param(np.arange(5, 50)),
            _execute_kwargs=dict(
                clear_cache=50,
                collect_garbage=50
            )
        )

        oos_start_index = start_index + period
        oos_end_index = start_index + 2 * period - pd.Timedelta(nanoseconds=1)
        oos_data = data[oos_start_index : oos_end_index]
        oos_bl_perf = oos_data.run("from_holding").sharpe_ratio
        best_fw, best_sw = is_perf.idxmax()
        oos_perf = sma_crossover_perf(oos_data, best_fw, best_sw)
        oos_perf_index = is_perf.index[is_perf.index == (best_fw, best_sw)]
        oos_perf = pd.Series([oos_perf], index=oos_perf_index)

        all_is_bounds[period_idx] = (is_start_index, is_end_index)
        all_oos_bounds[period_idx + 1] = (oos_start_index, oos_end_index)
        all_is_bl_perf[(split_idx, period_idx)] = is_bl_perf
        all_oos_bl_perf[(split_idx, period_idx + 1)] = oos_bl_perf
        all_is_perf[(split_idx, period_idx)] = is_perf
        all_oos_perf[(split_idx, period_idx + 1)] = oos_perf
        start_index = start_index + period
        split_idx += 1
        period_idx += 1
        pbar.update()

# %%
is_period_ranges = pd.DataFrame.from_dict(
    all_is_bounds,
    orient="index",
    columns=["start", "end"]
)
is_period_ranges.index.name = "period"
oos_period_ranges = pd.DataFrame.from_dict(
    all_oos_bounds,
    orient="index",
    columns=["start", "end"]
)
oos_period_ranges.index.name = "period"
period_ranges = pd.concat((is_period_ranges, oos_period_ranges))
period_ranges = period_ranges.drop_duplicates()
period_ranges

# %%
is_bl_perf = pd.Series(all_is_bl_perf)
is_bl_perf.index.names = ["split", "period"]
oos_bl_perf = pd.Series(all_oos_bl_perf)
oos_bl_perf.index.names = ["split", "period"]
bl_perf = pd.concat((
    is_bl_perf.vbt.select_levels("period"),
    oos_bl_perf.vbt.select_levels("period")
))
bl_perf = bl_perf.drop_duplicates()
bl_perf

# %%
is_perf = pd.concat(all_is_perf, names=["split", "period"])
is_perf

# %%
oos_perf = pd.concat(all_oos_perf, names=["split", "period"])
oos_perf

# %%
is_best_mask = is_perf.index.vbt.drop_levels("period").isin(
    oos_perf.index.vbt.drop_levels("period"))
is_best_perf = is_perf[is_best_mask]
is_best_perf

# %%
pd.concat((
    is_perf.describe(),
    is_best_perf.describe(),
    is_bl_perf.describe(),
    oos_perf.describe(),
    oos_bl_perf.describe()
), axis=1, keys=[
    "IS",
    "IS (Best)",
    "IS (Baseline)",
    "OOS (Test)",
    "OOS (Baseline)"
])

# %%
fig = is_perf.vbt.boxplot(
    by_level="period",
    trace_kwargs=dict(
        line=dict(color="lightskyblue"),
        opacity=0.4,
        showlegend=False
    ),
    xaxis_title="Period",
    yaxis_title="Sharpe",
)
is_best_perf.vbt.select_levels("period").vbt.plot(
    trace_kwargs=dict(
        name="Best",
        line=dict(color="limegreen", dash="dash")
    ),
    fig=fig
)
bl_perf.vbt.plot(
    trace_kwargs=dict(
        name="Baseline",
        line=dict(color="orange", dash="dash")
    ),
    fig=fig
)
oos_perf.vbt.select_levels("period").vbt.plot(
    trace_kwargs=dict(
        name="Test",
        line=dict(color="orangered")
    ),
    fig=fig
)
fig.show()

# %%
is_perf_split6 = is_perf.xs(6, level="split")
is_perf_split6.describe()

# %%
first_left_bound = period_ranges.loc[6, "start"]
first_right_bound = period_ranges.loc[6, "end"]
data[first_left_bound : first_right_bound].plot().show()

# %%
oos_perf.xs(6, level="period")

# %%
is_perf_split6.quantile(0.25)

# %%