thatgeeman
diff --git a/‎Pipfile
Lines changed: 1 addition & 3 deletions b/‎Pipfile
Lines changed: 1 addition & 3 deletions
diff --git a/‎Pipfile.lock
Lines changed: 443 additions & 390 deletions b/‎Pipfile.lock
Lines changed: 443 additions & 390 deletions
diff --git a/‎README.md
Lines changed: 23 additions & 55 deletions b/‎README.md
Lines changed: 23 additions & 55 deletions
diff --git a/‎nbs/03_ad_complete.ipynb
Lines changed: 142 additions & 44 deletions b/‎nbs/03_ad_complete.ipynb
Lines changed: 142 additions & 44 deletions
@@ -13,11 +13,8 @@ numpy = "==1.26"
 pandas = "*"
 nbdev = "*"
 fastcore = "*"
-kaggle = ">=1.5"
 matplotlib = "*"
 seaborn = "*"
-scipy = "*"
-scikit-learn = "*"
 torch = "==2.0.0"
 python-dotenv = "*"
 pre-commit = "*"
@@ -26,6 +23,7 @@ hydra-core = "*"
 [dev-packages]
 ipykernel = "*"
 ts-vae-lstm = {editable = true, path = "."}
+nbconvert = "*"
 
 [requires]
 python_version = "3.9"
@@ -1,69 +1,37 @@
-# TS VAE-LSTM
+## TS VAE-LSTM
 
+Implementation of the paper [Anomaly Detection for Time Series Using VAE-LSTM Hybrid Model](https://ieeexplore.ieee.org/document/9053558) 
 
-> Implementation of the paper [Anomaly Detection for Time Series Using
-> VAE-LSTM Hybrid Model](https://ieeexplore.ieee.org/document/9053558)
 
-This is a work in progress.
+#### Usage
+> Hydra configurations to reproduce the results provided in `config`. 
 
-#### TODO
-
-- [ ] Separate training from notebooks
-- [ ] Fix github-actions
-  - [ ] Page deployment
-  - [ ] CI tests
-  - [x] precommit
-- [x] Build complete AD pipeline
-- [x] include fine-grained threshold with quantile for within window
-  detection.
-- [x] use a squared term if the absolute element-wise error falls below
-  delta and a delta-scaled L1 term otherwise (Huber)
-- [x] Use dotenv `.env` to manage paths
-- [ ] Plot has a shift of 21 due to remainder -\> todo
-
-<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
-
-#### Installation
-
-``` sh
-pip install ts_vae_lstm
-```
+Clone. After setting up the environment with `pipenv` or others, update the paths (hydra) to the datasets and hyperparams for experimentation.
+run the scripts in order: `train_vae.py` -> `train_lstm.py` to generate the prerequisite models.
+Followed by the inference script: `run_ad.py` to generate the plots and logs. All outs will be written to `models/`.
 
 #### Results from NYC Traffic dataset
 
-At time $t$, past $k$ window(s) of length $p=48$ are taken. The VAE-LSTM
-reconstructs the past windows and if the true time series deviates from
-the reconstructed time series, the $k^{th}$ window is marked as an
-“anomalous window”.
+At time $t$, past $k$ window(s) of length $p=48$ are taken. The VAE-LSTM reconstructs the past windows and if the true time series deviates from the reconstructed time series, the $k^{th}$ window is marked as an "anomalous window".  
 
-VAE-LSTM is trained on a time series without anomalies so any deviation
-beyond the 90th quantile of reconstruction error (L2 norm) is considered
-an anomaly.
+VAE-LSTM is trained on a time series without anomalies so any deviation beyond the 90th quantile of reconstruction error (L2 norm) is considered an anomaly.
 
-In the figure (`sample_data/result_granular.png`), blue lines represent
-the unseen data. Orange lines correspond to the reconstructed data. Red
-dashed lines are the true labels in the unseen set. Green window is the
-region where anomaly was predicted. Green line is the first time anomaly
-was flagged in the window.
+In the figure, blue lines represent the unseen data. Orange lines correspond to the reconstructed data. Red dashed lines are the true labels in the unseen set. Green window is the region where anomaly was predicted. Green line is the first time anomaly was flagged in the window.
 
-![](sample_data/result_granular.png)
+![](./models/ad_result_z24_lstm_1733682851.4265444.png) 
 
 ## Misc
 
-### Env variables
 
-``` bash
-BASEDIR='<your-base-path>/ts_vae-lstm'
-MODELDIR=${BASEDIR}/models
-VAE_MODEL=${MODELDIR}/<best-vae-model>.pth
-LSTM_MODEL=${MODELDIR}/<best-lstm-model>.pth
-```
-
-### CUDA setup
-
-Download the driver and cuda version compiled for the driver.
-
-``` bash
-sudo mhwd -i pci video-nvidia-470xx
-sudo pacman -U https://archive.archlinux.org/packages/c/cuda/cuda-11.4.2-1-x86_64.pkg.tar.zst
-```
+#### TODO
+- [x] Training and inference scripts (#1)
+- [x] Separate training from notebooks
+- [ ] Fix github-actions
+  - [ ] Page deployment
+  - [ ] CI tests
+  - [x] precommit
+- [x] Build complete AD pipeline
+- [x] include fine-grained threshold with quantile for within window detection.
+- [x] use a squared term if the absolute element-wise error falls below delta and a delta-scaled L1 term otherwise (Huber)
+- [x] Use dotenv `.env` to manage paths
+- [x] Plot has a shift of 21 due to remainder -> todo
@@ -67,34 +67,7 @@
    "cell_type": "code",
    "execution_count": 5,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "t\n",
-      "t_unit\n",
-      "readings\n",
-      "idx_anomaly\n",
-      "idx_split\n",
-      "training\n",
-      "test\n",
-      "train_m\n",
-      "train_std\n",
-      "t_train\n",
-      "t_test\n",
-      "idx_anomaly_test\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/gg/.local/share/virtualenvs/ts_vae-lstm-hz-Oy2CQ/lib/python3.9/site-packages/torch/cuda/__init__.py:107: UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:109.)\n",
-      "  return torch._C._cuda_getDeviceCount() > 0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# | export\n",
     "from ts_vae_lstm.vae import VAE, Encoder, Decoder, StochasticSampler\n",
@@ -108,47 +81,172 @@
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for configs\n",
+    "from hydra import compose, initialize\n",
+    "from omegaconf import OmegaConf\n",
+    "from fastcore.xtras import Path\n",
+    "import os\n",
+    "import glob"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# run only once\n",
+    "try:\n",
+    "    initialize(config_path=\"../config\", version_base=\"1.2\")\n",
+    "    cfg = compose(config_name=\"config.yaml\")\n",
+    "    cfg = OmegaConf.to_object(cfg)  # perform interpolation of the variables also\n",
+    "    cfg = OmegaConf.create(cfg)  # so that dot-notation works?\n",
+    "    cfg.base_dir = \"..\"  # to make it work in the notebook\n",
+    "except Exception as e:\n",
+    "    print(f\"Got Exception while reading config:\\n{e}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "LSTM model: /run/media/data2/ts_vae-lstm/models/lstm_100_val0.81.pth\n",
-      "VAE model: /run/media/data2/ts_vae-lstm/models/vae_100_z24.pth\n"
+      "Number of workers: 4\n"
      ]
     }
    ],
    "source": [
-    "load_dotenv()\n",
-    "\n",
-    "BASEDIR = os.getenv(\"BASEDIR\")\n",
-    "MODELDIR = os.getenv(\"MODELDIR\")\n",
-    "VAE_MODEL = os.getenv(\"VAE_MODEL\")\n",
-    "LSTM_MODEL = os.getenv(\"LSTM_MODEL\")\n",
-    "\n",
-    "print(f\"LSTM model: {LSTM_MODEL}\\nVAE model: {VAE_MODEL}\")"
+    "num_workers = cfg.num_workers if cfg.get(\"num_workers\", None) else os.cpu_count()\n",
+    "print(f\"Number of workers: {num_workers}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(4, 'cpu')"
+       "'cuda'"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "num_workers = os.cpu_count()\n",
-    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
-    "num_workers, device"
+    "device = cfg.device if cfg.device else (\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'..'"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cfg.base_dir"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Base directory: /run/media/data2/ts_vae-lstm\n",
+      "Model directory: /run/media/data2/ts_vae-lstm/models\n",
+      "Dataset is /run/media/data2/ts_vae-lstm/sample_data/nyc_taxi.npz\n",
+      "VAE model: /run/media/data2/ts_vae-lstm/models/best_vae_*_z24_*.pth\n",
+      "LSTM model: /run/media/data2/ts_vae-lstm/models/best_lstm_*_z24_*.pth\n"
+     ]
+    }
+   ],
+   "source": [
+    "BASEDIR = Path(cfg.base_dir).resolve()\n",
+    "MODELDIR = Path(\".\" + cfg.model_dir).resolve()  # to move to project root\n",
+    "DATAPATH = Path(\".\" + cfg.dataset.path).resolve()  # to move to project root\n",
+    "VAE_MODEL = Path(\".\" + cfg.vae_path).resolve()  # to move to project root\n",
+    "LSTM_MODEL = Path(\".\" + cfg.lstm_path).resolve()\n",
+    "print(f\"Base directory: {BASEDIR}\")\n",
+    "print(f\"Model directory: {MODELDIR}\")\n",
+    "print(f\"Dataset is {DATAPATH}\")\n",
+    "print(f\"VAE model: {VAE_MODEL}\")\n",
+    "print(f\"LSTM model: {LSTM_MODEL}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/run/media/data2/ts_vae-lstm/models/best_vae_100_z24_1733051559.pth\n"
+     ]
+    }
+   ],
+   "source": [
+    "if cfg.pattern:\n",
+    "    paths = glob.glob(f\"{VAE_MODEL}\")\n",
+    "    latest_path = paths[0]\n",
+    "    latest_time = 0\n",
+    "    for path in paths:\n",
+    "        if os.path.getmtime(path) > latest_time:\n",
+    "            latest_path = path\n",
+    "            latest_time = os.path.getmtime(path)\n",
+    "    VAE_MODEL = latest_path\n",
+    "    print(VAE_MODEL)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/run/media/data2/ts_vae-lstm/models/best_lstm_100_z24_1733058653.pth\n"
+     ]
+    }
+   ],
+   "source": [
+    "if cfg.pattern:\n",
+    "    paths = glob.glob(f\"{LSTM_MODEL}\")\n",
+    "    latest_path = paths[0]\n",
+    "    latest_time = 0\n",
+    "    for path in paths:\n",
+    "        if os.path.getmtime(path) > latest_time:\n",
+    "            latest_path = path\n",
+    "            latest_time = os.path.getmtime(path)\n",
+    "    LSTM_MODEL = latest_path\n",
+    "    print(LSTM_MODEL)"
    ]
   },
   {