Tohoku Dataset¶

The second dataset used in this study is contributed by students at Tohoku University.
Minimal Example¶

# -------------------------------------------------------------------------
# STEP-1: LOAD LIBRARIES
# Base libraries
import os
from pathlib import Path

# Database library
import duckdb

# Custom osbad library for anomaly detection
import osbad.config as bconf
from osbad.database import BenchDB

# -------------------------------------------------------------------------
# STEP-2: LOAD CELL INVENTORY FOR CELL_INDEX

# Path to the DuckDB instance:
# "train_dataset_severson.db"
db_filepath = (
   Path.cwd()
   .parent
   .joinpath("database","train_dataset_severson.db"))
print(db_filepath)

# Create a DuckDB connection
con = duckdb.connect(
   db_filepath,
   read_only=True)

# Load all training dataset from duckdb
df_duckdb = con.execute(
   "SELECT * FROM df_train_dataset_sv").fetchdf()

unique_cell_index_train = df_duckdb["cell_index"].unique()
print(unique_cell_index_train)

# Get the cell-ID from unique_cell_index_train
selected_cell_label = "2017-05-12_5_4C-70per_3C_CH17"

# Create a subfolder to store fig output
# corresponding to each cell-index
selected_cell_artifacts_dir = bconf.artifacts_output_dir(
   selected_cell_label)

# -------------------------------------------------------------------------
# STEP-3: LOAD BENCHMARKING DATASET

# Import the BenchDB class
# Load only the dataset based on the selected cell
benchdb = BenchDB(
   db_filepath,
   selected_cell_label)

# load the benchmarking dataset
df_selected_cell = benchdb.load_benchmark_dataset(
   dataset_type="train")

if df_selected_cell is not None:

   filter_col = [
      "cell_index",
      "cycle_index",
      "discharge_capacity",
      "voltage"]

   # Drop true labels from the benchmarking dataset
   # and filter for selected columns only
   df_selected_cell_without_labels = benchdb.drop_labels(
      df_selected_cell,
      filter_col)

   # Extract true outliers cycle index from benchmarking dataset
   true_outlier_cycle_index = benchdb.get_true_outlier_cycle_index(
      df_selected_cell)
   print(f"True outlier cycle index:")
   print(true_outlier_cycle_index)

# -------------------------------------------------------------------------
# STEP-4: PLOT CYCLING DATASET

# Plot cell data with true anomalies
# If the true outlier cycle index is not known,
# cycling data will be plotted without labels
benchdb.plot_cycle_data(
   df_selected_cell_without_labels,
   true_outlier_cycle_index)
cell cycling dataset from ``2017-05-12_5_4C-70per_3C_CH17``