Skip to content

Commit

Permalink
add time series comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
emptymalei committed Dec 1, 2024
1 parent fe3473f commit f905cb7
Show file tree
Hide file tree
Showing 7 changed files with 606 additions and 124 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,4 @@ po/*~
dl/notebooks/**/*.ipynb
!dl/notebooks/**/tree_darts.ipynb
dl/notebooks/lightning_logs/*
dl/notebooks/tmp/*
247 changes: 247 additions & 0 deletions dl/notebooks/timeseries-comparison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.15.2
# kernelspec:
# display_name: .venv
# language: python
# name: python3
# ---

# # Comparing Time Series with Each Other
#
# Time series data involves a time dimension, and it is not that intuitive to see the difference between two time series. In this notebook, We will show you how to compare time series with each other.

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# +
from dtaidistance import dtw
from dtaidistance import dtw_visualisation as dtwvis

sns.set_theme()

plt.rcParams.update(
{
"font.size": 18, # General font size
"axes.titlesize": 20, # Title font size
"axes.labelsize": 16, # Axis label font size
"xtick.labelsize": 14, # X-axis tick label font size
"ytick.labelsize": 14, # Y-axis tick label font size
"legend.fontsize": 14, # Legend font size
"figure.titlesize": 20, # Figure title font size
}
)
# -

# # DTW
# To illustrate how DTW can be used to compare time series, we will use the following datasets:

t = np.arange(0, 20, 0.1)
ts_original = np.sin(t)

# We apply different transformations to the original time series.

ts_shifted = np.roll(ts_original, 10)
ts_jitter = ts_original + np.random.normal(0, 0.1, len(ts_original))
ts_flipped = ts_original[::-1]
ts_shortened = ts_original[::2]
ts_raise_level = ts_original + 0.5
ts_outlier = ts_original + np.append(np.zeros(len(ts_original) - 1), [10])

df = pd.DataFrame(
{
"t": t,
"original": ts_original,
"shifted": ts_shifted,
"jitter": ts_jitter,
"flipped": ts_flipped,
"shortened": np.pad(
ts_shortened, (0, len(ts_original) - len(ts_shortened)), constant_values=0
),
"raise_level": ts_raise_level,
"outlier": ts_outlier,
}
)

# +
_, ax = plt.subplots()

for s in df.columns[1:]:
sns.lineplot(df, x="t", y=s, ax=ax, label=s)


# +
distances = {
"series": df.columns[1:],
}

for s in df.columns[1:]:
distances["dtw"] = distances.get("dtw", []) + [dtw.distance(df.original, df[s])]
distances["euclidean"] = distances.get("euclidean", []) + [
np.linalg.norm(df.original - df[s])
]


_, ax = plt.subplots(figsize=(10, 6.18 * 2), nrows=2)

pd.DataFrame(distances).set_index("series").plot.bar(ax=ax[0])

colors = sns.color_palette("husl", len(distances["series"]))
pd.DataFrame(distances).plot.scatter(x="dtw", y="euclidean", ax=ax[1], c=colors, s=100)

for i, txt in enumerate(distances["series"]):
ax[1].annotate(txt, (distances["dtw"][i], distances["euclidean"][i]), fontsize=12)

ax[1].legend(distances["series"], loc="best")


# -


def dtw_map(s1, s2, window=None):
if window is None:
window = len(s1)
d, paths = dtw.warping_paths(s1, s2, window=window, psi=2)
best_path = dtw.best_path(paths)

return dtwvis.plot_warpingpaths(s1, s2, paths, best_path)


dtw_map(df.original, df.jitter)

for s in df.columns[1:]:
fig, ax = dtw_map(df.original, df[s])
fig.suptitle(s, y=1.05)


# # Dimension Reduction
#
# We embed the original time series into a time-delayed embedding space, then reduce the dimensionality of the embedded time series for visualizations.


def time_delay_embed(df: pd.DataFrame, window_size: int) -> pd.DataFrame:
"""embed time series into a time delay embedding space
Time column `t` is required in the input data frame.
:param df: original time series data frame
:param window_size: window size for the time delay embedding
"""
dfs_embedded = []

for i in df.rolling(window_size):
i_t = i.t.iloc[0]
dfs_embedded.append(
pd.DataFrame(i.reset_index(drop=True))
.drop(columns=["t"])
.T.reset_index()
.rename(columns={"index": "name"})
.assign(t=i_t)
)

df_embedded = pd.concat(dfs_embedded[window_size - 1 :])

return df_embedded


df_embedded_2 = time_delay_embed(df, window_size=2)

# +
_, ax = plt.subplots()

(
df_embedded_2.loc[df_embedded_2.name == "original"].plot.line(
x=0, y=1, ax=ax, legend=False
)
)
(
df_embedded_2.loc[df_embedded_2.name == "original"].plot.scatter(
x=0, y=1, c="t", colormap="viridis", ax=ax
)
)
# -

# We choose a higher window size to track longer time dependency and for the dimension reduction methods to function properly.

df_embedded = time_delay_embed(df, window_size=5)

# ## PCA

from sklearn.decomposition import PCA

pca = PCA(n_components=2)

df_embedded_pca = pd.concat(
[
pd.DataFrame(
pca.fit_transform(
df_embedded.loc[df_embedded.name == n].drop(columns=["name", "t"])
),
columns=["pca_0", "pca_1"],
).assign(name=n)
for n in df_embedded.name.unique()
]
)

sns.scatterplot(data=df_embedded_pca, x="pca_0", y="pca_1", hue="name")

# +
_, ax = plt.subplots()

sns.scatterplot(
data=df_embedded_pca.loc[
df_embedded_pca.name.isin(
["original", "jitter", "flipped", "raise_level", "shifted", "shortened"]
)
],
x="pca_0",
y="pca_1",
hue="name",
style="name",
ax=ax,
)

ax.legend(loc="lower center", bbox_to_anchor=(0.5, -0.4), ncol=3)
# -


# ## t-SNE

from sklearn.manifold import TSNE

t_sne = TSNE(n_components=2, learning_rate="auto", init="random", perplexity=3)

df_embedded.name.unique()

df_embedded_tsne = pd.concat(
[
pd.DataFrame(
t_sne.fit_transform(
df_embedded.loc[df_embedded.name == n].drop(columns=["name", "t"])
),
columns=["tsne_0", "tsne_1"],
).assign(name=n)
for n in df_embedded.name.unique()
]
)

df_embedded_tsne.loc[df_embedded_tsne.name == "original"]

sns.scatterplot(data=df_embedded_tsne, x="tsne_0", y="tsne_1", hue="name")

sns.scatterplot(
data=df_embedded_tsne.loc[
df_embedded_tsne.name.isin(["original", "jitter", "outlier"])
],
x="tsne_0",
y="tsne_1",
hue="name",
)
15 changes: 13 additions & 2 deletions dl/notebooks/timeseries_gan.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ def generation(self, num_samples, mean=0.0, std=1.0):
self.ori_data, self.ori_time, self.opt.batch_size
)
self.Z = random_generator(
num_samples, self.opt.z_dim, self.T, self.max_seq_len, mean, std
num_samples, self.opt.z_dim, self.T, self.max_seq_len # , mean, std
)
self.Z = torch.tensor(self.Z, dtype=torch.float32).to(self.device)
self.E_hat = self.netg(self.Z) # [?, 24, 24]
Expand Down Expand Up @@ -863,11 +863,22 @@ class ModelParams:
}
# -

data = sine_data_generation(3661, 24, 6)
data = sine_data_generation(3661, 24, 1)

# +

model = TimeGAN(ModelParams(**model_params), data)
# -

model.train()

model.generated_data.shape

model.generated_data

import matplotlib.pyplot as plt

data[0]

for i in range(len(data)):
plt.plot(data[i].shape, ".")
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ nav:
- "Forecasting with Transformer": notebooks/transformer_timeseries_univariate.py
- "Forecasting with NeuralODE": notebooks/neuralode_timeseries.py
- "Generate Time Series Using Statistics": notebooks/time-series-data-generation.py
- "Comparing Time Series": notebooks/timeseries-comparison.py
- "Small Yet Powerful Concepts":
- concepts/index.md
- "Entropy": concepts/entropy.md
Expand Down
Loading

0 comments on commit f905cb7

Please sign in to comment.