InvalidArgumentError on Tensorflow

Question

Created Dec ’22

Replies 2

Boosts 0

Participants 2

I was trying to train a tf model with on my M1 macbook pro then got a InvalidArgumentError, as follows: InvalidArgumentError: Cannot assign a device for operation model/conv2d_12/Conv2D/ReadVariableOp: Could not satisfy explicit device specification '' because the node {{colocation_node model/conv2d_12/Conv2D/ReadVariableOp}} was colocated with a group of nodes that required incompatible device '/job:localhost/replica:0/task:0/device:GPU:0'. All available devices [/job:localhost/replica:0/task:0/device:CPU:0, /job:localhost/replica:0/task:0/device:GPU:0]. can anyone help with the problem, many thanks!

Boost

Answer 1

Frameworks Engineer OP

Apple

Dec ’22

Hi @xuliu

Can you let us know which version of the tensorflow-metal, tensorflow-macos and the OS this was seen on? And if you would have a simple small script to reproduce this it would help as well. So the reason for an error message like the one you are seeing would be that some variant of the ReadVariableOp is not enabled on the GPU. This could be because the op is requesting for an unsupported data type for example.

0

Answer 2

xuliu OP

Dec ’22

Hi, the tensorflow-metal, tensorflow-macos and the OS are 0.6.0, 2.10.0 and 13.0.1 (22A400) respectively. The codes are as below:

import tensorflow as tf import keras from keras.models import Model from keras.layers import Input, Dense, Dropout, Activation from keras.layers import Conv2D, GlobalAveragePooling2D from keras.layers import BatchNormalization, Add from keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau from keras.models import load_model import warnings warnings.filterwarnings("ignore")

dataset files

train_files = ["esc_melsp_all_train_raw.npz"] test_file = "esc_melsp_all_test.npz"

train_num = len(x_train) test_num = len(x_test) print(train_num) print(test_num)

define dataset placeholders

x_train = np.zeros(freqtimetrain_numlen(train_files)).reshape(train_numlen(train_files), freq, time) y_train = np.zeros(train_num*len(train_files))

load dataset

for i in range(len(train_files)): data = np.load(train_files[i]) x_train[i*train_num:(i+1)train_num] = data["x"] y_train[itrain_num:(i+1)*train_num] = data["y"]

load test dataset

test_data = np.load(test_file) x_test = test_data["x"] y_test = test_data["y"]

redefine target data into one hot vector

classes = 50 y_train = keras.utils.to_categorical(y_train, classes) y_test = keras.utils.to_categorical(y_test, classes)

reshape training dataset

x_train = x_train.reshape(train_num*1, freq, time, 1) x_test = x_test.reshape(test_num, freq, time, 1)

def cba(inputs, filters, kernel_size, strides): x = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding='same')(inputs) x = BatchNormalization()(x) x = Activation("relu")(x) return x

define CNN

inputs = Input(shape=(x_train.shape[1:]))

x_1 = cba(inputs, filters=32, kernel_size=(1,8), strides=(1,2)) x_1 = cba(x_1, filters=32, kernel_size=(8,1), strides=(2,1)) x_1 = cba(x_1, filters=64, kernel_size=(1,8), strides=(1,2)) x_1 = cba(x_1, filters=64, kernel_size=(8,1), strides=(2,1))

x_2 = cba(inputs, filters=32, kernel_size=(1,16), strides=(1,2)) x_2 = cba(x_2, filters=32, kernel_size=(16,1), strides=(2,1)) x_2 = cba(x_2, filters=64, kernel_size=(1,16), strides=(1,2)) x_2 = cba(x_2, filters=64, kernel_size=(16,1), strides=(2,1))

x_3 = cba(inputs, filters=32, kernel_size=(1,32), strides=(1,2)) x_3 = cba(x_3, filters=32, kernel_size=(32,1), strides=(2,1)) x_3 = cba(x_3, filters=64, kernel_size=(1,32), strides=(1,2)) x_3 = cba(x_3, filters=64, kernel_size=(32,1), strides=(2,1))

x_4 = cba(inputs, filters=32, kernel_size=(1,64), strides=(1,2)) x_4 = cba(x_4, filters=32, kernel_size=(64,1), strides=(2,1)) x_4 = cba(x_4, filters=64, kernel_size=(1,64), strides=(1,2)) x_4 = cba(x_4, filters=64, kernel_size=(64,1), strides=(2,1))

x = Add()([x_1, x_2, x_3, x_4])

x = cba(x, filters=128, kernel_size=(1,16), strides=(1,2)) x = cba(x, filters=128, kernel_size=(16,1), strides=(2,1))

x = GlobalAveragePooling2D()(x) x = Dense(classes)(x) x = Activation("softmax")(x)

model = Model(inputs, x) model.summary()

initiate Adam optimizer

opt = tf.optimizers.Adam(lr=0.0001, decay=1e-6, amsgrad=True)

Let's train the model using Adam with amsgrad

model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

directory for model checkpoints

model_dir = "./models" if not os.path.exists(model_dir): os.mkdir(model_dir)

early stopping and model checkpoint# early

es_cb = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True,verbose=1, mode='min') chkpt = os.path.join(model_dir, 'model.hdf5') cp_cb = ModelCheckpoint(filepath = chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='min') reduce_lr = ReduceLROnPlateau(monitor="val_loss",factor=0.6,patience=3,verbose=1,mode="min")

between class data generator

class MixupGenerator(): def init(self, x_train, y_train, batch_size=16, alpha=0.2, shuffle=True): self.x_train = x_train self.y_train = y_train self.batch_size = batch_size self.alpha = alpha self.shuffle = shuffle self.sample_num = len(x_train)

def __call__(self):
    while True:
        indexes = self.__get_exploration_order()
        itr_num = int(len(indexes) // (self.batch_size * 2))

        for i in range(itr_num):
            batch_ids = indexes[i * self.batch_size * 2:(i + 1) * self.batch_size * 2]
            x, y = self.__data_generation(batch_ids)

            yield x, y

def __get_exploration_order(self):
    indexes = np.arange(self.sample_num)

    if self.shuffle:
        np.random.shuffle(indexes)

    return indexes

def __data_generation(self, batch_ids):
    _, h, w, c = self.x_train.shape
    _, class_num = self.y_train.shape
    x1 = self.x_train[batch_ids[:self.batch_size]]
    x2 = self.x_train[batch_ids[self.batch_size:]]
    y1 = self.y_train[batch_ids[:self.batch_size]]
    y2 = self.y_train[batch_ids[self.batch_size:]]
    l = np.random.beta(self.alpha, self.alpha, self.batch_size)
    x_l = l.reshape(self.batch_size, 1, 1, 1)
    y_l = l.reshape(self.batch_size, 1)

    x = x1 * x_l + x2 * (1 - x_l)
    y = y1 * y_l + y2 * (1 - y_l)

    return x, y

train model

batch_size = 32 epochs = 500

training_generator = MixupGenerator(x_train, y_train)() model.fit_generator(generator=training_generator, steps_per_epoch=x_train.shape[0] // batch_size, validation_data=(x_test, y_test), epochs=epochs, verbose=1, shuffle=True, callbacks=[es_cb, cp_cb,reduce_lr])

0