Get in touch
or send us a question?
CONTACT

Climate forecast using LSTM

import matplotlib.pyplot as plt

import pandas as pd

import keras

titles = [

    “Pressure”,

    “Temperature”,

    “Temperature in Kelvin”,

    “Temperature (dew point)”,

    “Relative Humidity”,

    “Saturation vapor pressure”,

    “Vapor pressure”,

    “Vapor pressure deficit”,

    “Specific humidity”,

    “Water vapor concentration”,

    “Airtight”,

    “Wind speed”,

    “Maximum wind speed”,

    “Wind direction in degrees”,

]

feature_keys = [

    “p (mbar)”,

    “T (degC)”,

    “Tpot (K)”,

    “Tdew (degC)”,

    “rh (%)”,

    “VPmax (mbar)”,

    “VPact (mbar)”,

    “VPdef (mbar)”,

    “sh (g/kg)”,

    “H2OC (mmol/mol)”,

    “rho (g/m**3)”,

    “wv (m/s)”,

    “max. wv (m/s)”,

    “wd (deg)”,

]

colors = [

    “blue”,

    “orange”,

    “green”,

    “red”,

    “purple”,

    “brown”,

    “pink”,

    “gray”,

    “olive”,

    “cyan”,

]

date_time_key = “Date Time”

def show_raw_visualization(data):

    time_data = data[date_time_key]

    fig, axes = plt.subplots(

        nrows=7, ncols=2, figsize=(15, 20), dpi=80, facecolor=”w”, edgecolor=”k”

    )

    for i in range(len(feature_keys)):

        key = feature_keys[i]

        c = colors[i % (len(colors))]

        t_data = data[key]

        t_data.index = time_data

        t_data.head()

        ax = t_data.plot(

            ax=axes[i // 2, i % 2],

            color=c,

            title=”{} – {}”.format(titles[i], key),

            rot=25,

        )

        ax.legend([titles[i]])

    plt.tight_layout()

df = pd.read_csv(‘data/timeseries/climate.csv’)

show_raw_visualization(df)

# plt.show()

split_fraction = 0.715

train_split = int(split_fraction * int(df.shape[0]))

step = 6

past = 720

future = 72

learning_rate = 0.001

batch_size = 256

epochs = 10

def normalize(data, train_split):

    data_mean = data[:train_split].mean(axis=0)

    data_std = data[:train_split].std(axis=0)

    return (data – data_mean) / data_std

selected_features = [feature_keys[i] for i in [0, 1, 5, 7, 8, 10, 11]]

features = df[selected_features]

features.index = df[date_time_key]

features.head()

features = normalize(features.values, train_split)

features = pd.DataFrame(features)

features.head()

train_data = features.loc[0:train_split – 1]

val_data = features.loc[train_split:]

start = past + future

end = start + train_split

x_train = train_data[[i for i in range(7)]].values

y_train = features.iloc[start:end][[1]]

sequence_length = int(past / step)

dataset_train = keras.preprocessing.timeseries_dataset_from_array(

    x_train,

    y_train,

    sequence_length=sequence_length,

    sampling_rate=step,

    batch_size=batch_size,)

x_end = len(val_data) – past – future

label_start = train_split + past + future

x_val = val_data.iloc[:x_end][[i for i in range(7)]].values

y_val = features.iloc[label_start:][[1]]

dataset_val = keras.preprocessing.timeseries_dataset_from_array(

    x_val,

    y_val,

    sequence_length=sequence_length,

    sampling_rate=step,

    batch_size=batch_size,)

for batch in dataset_train.take(1):

    inputs, targets = batch

print(“Input shape:”, inputs.numpy().shape)

print(“Target shape:”, targets.numpy().shape)

inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))

lstm_out = keras.layers.LSTM(32)(inputs)

outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer=keras.optimizers.Adam(

    learning_rate=learning_rate), loss=”mse”)

model.summary()

path_checkpoint = “model_checkpoint.weights.h5”

es_callback = keras.callbacks.EarlyStopping(

    monitor=”val_loss”, min_delta=0, patience=5)

modelckpt_callback = keras.callbacks.ModelCheckpoint(

    monitor=”val_loss”,

    filepath=path_checkpoint,

    verbose=1,

    save_weights_only=True,

    save_best_only=True,

)

history = model.fit(

    dataset_train,

    epochs=epochs,

    validation_data=dataset_val,

    callbacks=[es_callback, modelckpt_callback],

)

def visualize_loss(history, title):

    loss = history.history[“loss”]

    val_loss = history.history[“val_loss”]

    epochs = range(len(loss))

    plt.figure()

    plt.plot(epochs, loss, “b”, label=”Training loss”)

    plt.plot(epochs, val_loss, “r”, label=”Validation loss”)

    plt.title(title)

    plt.xlabel(“Epochs”)

    plt.ylabel(“Loss”)

    plt.legend()

    plt.show()

visualize_loss(history, “Training and Validation Loss”)

source: https://keras.io/