I was also having this problem on my 2019 MacBook Pro and I managed to solve the problem like this. You can circumvent this problem by creating your own implementation of Adam in keras and use that.
I have made a very rough and basic implementation while referencing the research paper on Adam (https://arxiv.org/abs/1412.6980) and Creating a Custom Optimiser in Tensorflow (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer#creating_a_custom_optimizer_2).
Please note that I have not implemented _resource_apply_sparse or any of Adam’s fancier bells and whistles (such as amsgrad). This is a simple and basic implementation of the optimiser as described in the paper I referenced above.
IMPORTANT NOTE:
The code requires that it runs in eager mode (due to self.iterations.numpy()).
To implement this we add the line “tf.config.run_functions_eagerly(True)” at the top of the code.
Optimiser code:
import tensorflow as tf
tf.config.run_functions_eagerly(True)
class CustomAdam(tf.keras.optimizers.Optimizer):
def __init__(self, learning_rate=0.001,beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8, name="CustomAdam", **kwargs):
super().__init__(name, **kwargs)
self._set_hyper("learning_rate", kwargs.get("lr", learning_rate)) # handle lr=learning_rate
self._set_hyper("decay", self._initial_decay)
self._set_hyper("beta_v", beta1)
self._set_hyper("beta_s", beta2)
self._set_hyper("epsilon", epsilon)
self._set_hyper("corrected_v", beta1)
self._set_hyper("corrected_s", beta2)
def _create_slots(self, var_list):
"""
One slot per model variable.
"""
for var in var_list:
self.add_slot(var, "beta_v")
self.add_slot(var, "beta_s")
self.add_slot(var, "epsilon")
self.add_slot(var, "corrected_v")
self.add_slot(var, "corrected_s")
@tf.function
def _resource_apply_dense(self, grad, var):
"""Update the slots and perform an optimization step for the model variable.
"""
var_dtype = var.dtype.base_dtype
lr_t = self._decayed_lr(var_dtype) # handle learning rate decay
momentum_var1 = self.get_slot(var, "beta_v")
momentum_hyper1 = self._get_hyper("beta_v", var_dtype)
momentum_var2 = self.get_slot(var, "beta_s")
momentum_hyper2 = self._get_hyper("beta_s", var_dtype)
momentum_var1.assign(momentum_var1 * momentum_hyper1 + (1. - momentum_hyper1)* grad)
momentum_var2.assign(momentum_var2 * momentum_hyper2 + (1. - momentum_hyper2)* (grad ** 2))
# Adam bias-corrected estimate
corrected_v = self.get_slot(var, "corrected_v")
corrected_v.assign(momentum_var1 / (1 - (momentum_hyper1 ** (self.iterations.numpy() + 1) )))
corrected_s = self.get_slot(var, "corrected_s")
corrected_s.assign(momentum_var2 / (1 - (momentum_hyper2 ** (self.iterations.numpy() + 1) )))
epsilon_hyper = self._get_hyper("epsilon", var_dtype)
var.assign_add(-lr_t * (corrected_v / (tf.sqrt(corrected_s) + epsilon_hyper)))
def _resource_apply_sparse(self, grad, var):
raise NotImplementedError
def get_config(self):
base_config = super().get_config()
return {
**base_config,
"learning_rate": self._serialize_hyperparameter("learning_rate"),
"decay": self._serialize_hyperparameter("decay"),
"beta_v": self._serialize_hyperparameter("beta_v"),
"beta_s": self._serialize_hyperparameter("beta_s"),
"epsilon": self._serialize_hyperparameter("epsilon"),
}
Example usage:
model.compile(optimizer = CustomAdam(),
loss= ‘mse’)
model.fit(X, Y, epochs=10)
This code reproduces the crash ```In [2]: import tensorflow as tf ...: ...: mnist = tf.keras.datasets.mnist ...: ...: (x_train, y_train), (x_test, y_test) = mnist.load_data() ...: x_train, x_test = x_train / 255.0, x_test / 255.0 ...: ...: model = tf.keras.models.Sequential([ ...: tf.keras.layers.Flatten(input_shape=(28, 28)), ...: tf.keras.layers.Dense(128, activation='relu'), ...: tf.keras.layers.Dropout(0.2), ...: tf.keras.layers.Dense(10) ...: ]) ...: ...: predictions = model(x_train[:1]).numpy() ...: tf.nn.softmax(predictions).numpy() ...: ...: loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True ...: ) ...: ...: loss_fn(y_train[:1], predictions).numpy() ...: ...: model.compile(optimizer = 'adam', loss = loss_fn) ...: model.fit(x_train, y_train, epochs=100) Epoch 1/100 2021-10-10 10:50:53.503460: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled. 2021-10-10 10:50:53.527 python[25080:3485800] -[MPSGraph adamUpdateWithLearningRateTensor:beta1Tensor:beta2Tensor:epsilonTensor:beta1PowerTensor:beta2PowerTensor:valuesTensor:momentumTensor:velocityTensor:gradientTensor:name:]: unrecognized selector sent to instance 0x6000037975a0 zsh: segmentation fault ipython
This code reproduces the crash ...
I have the same problem here. Python: 3.0.10 macOS: monterey 12.0.1 macbook pro 2018 with Radeon Pro 560X 4 GB
But before I upgrade to Monterey, The scripts works just fine with Big Sur.