Memory leaks when using GPU

I haven't used the GPU implementation for over a year now due to constant issues (I use tf.config.set_visible_devices([], 'GPU') to use CPU only.

I have also had a couple of issues with model convergence using GPU, however this issue seems more prominent, and possibly unrelated.

Here is an example of code that causes a memory leak using GPU (I cannot link the dataset, but it is called: Text classification documentation, by TANISHQ DUBLISH on Kaggle.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

df = pd.read_csv('df_file.csv')
df.head()

train_df = df.sample(frac=0.7, random_state=42)
val_df = df.drop(train_df.index).sample(frac=0.5, random_state=42)
test_df = df.drop(train_df.index).drop(val_df.index)

train_dataset = tf.data.Dataset.from_tensor_slices((train_df['Text'].values, train_df['Label'].values)).batch(32).prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices((val_df['Text'].values, val_df['Label'].values)).batch(32).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((test_df['Text'].values, test_df['Label'].values)).batch(32).prefetch(tf.data.AUTOTUNE)

text_vectorizer = tf.keras.layers.TextVectorization(max_tokens=100_000, output_mode='int', output_sequence_length=1000, pad_to_max_tokens=True)
text_vectorizer.adapt(train_df['Text'].values)

embedding = tf.keras.layers.Embedding(input_dim=len(text_vectorizer.get_vocabulary()), output_dim=128, input_length=1000)

inputs = tf.keras.layers.Input(shape=[], dtype=tf.string)
x = text_vectorizer(inputs)
x = embedding(x)
x = tf.keras.layers.LSTM(64)(x)
outputs = tf.keras.layers.Dense(5, activation='softmax')(x)

model_2 = tf.keras.Model(inputs, outputs, name='model_2_lstm')

model_2.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer=tf.keras.optimizers.legacy.Adam(), metrics=['accuracy'])
model_2_history = model_2.fit(train_dataset, epochs=50, validation_data=val_dataset, callbacks=[
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint(model_2.name, save_best_only=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1)
])