-[_MTLCommandBuffer addCompletedHandler:]:867:

failed assertion `Completed handler provided after commit call'.

how to clear this error any. when i run with cpu i am getting storage error so i tried with GPU.

partial code

#PositionalEncoding class PositionalEncoding(nn.Module): def init(self, d_model, max_len, dropout_prob=0.1): super(PositionalEncoding, self).init() self.dropout = nn.Dropout(p=dropout_prob)

    # Create positional encoding matrix
    pe = torch.zeros(max_len, d_model)
    position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
    
    # Pad div_term with zeros if necessary
    div_term_padded = torch.zeros(d_model)
    div_term_padded[:div_term.size(0)] = div_term

    pe[:, 0::2] = torch.sin(position * div_term_padded[0::2])
    pe[:, 1::2] = torch.cos(position * div_term_padded[1::2])
    pe = pe.unsqueeze(0).transpose(0, 1)
    self.register_buffer('pe', pe)
    
def forward(self, x):
    x = x + self.pe[:x.size(0), :]
    return self.dropout(x)

#transformermodel class

class TransformerModel(nn.Module): def init(self, input_size, hidden_size, num_layers, d_model, num_heads, dropout_prob, output_size, device, max_len): super(TransformerModel, self).init() self.device = device self.hidden_size = hidden_size self.d_model = d_model self.num_heads = num_heads

    #self.embedding = nn.Embedding(input_size, d_model).to(device)
    self.embedding = nn.Linear(input_size, d_model).to(device)

    self.pos_encoder = PositionalEncoding(d_model, max_len, dropout_prob).to(device)

    self.transformer_encoder_layer = nn.TransformerEncoderLayer(d_model, num_heads, hidden_size, dropout_prob).to(device)
    self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer, num_layers).to(device)
    self.decoder = nn.Linear(d_model, output_size).to(device)
    self.to(device)  # Ensure the model is on the correct device

def forward(self, x):
    #x = x.long()
    x = x.transpose(0, 1)  # Transpose the input tensor to match the expected shape for the transformer
    x = x.squeeze()  # Remove the extra dimension from the input tensor

    x = self.embedding(x)  # Apply the input embedding
    x = self.pos_encoder(x)  # Add positional encoding
    x = self.transformer_encoder(x)  # Apply the transformer encoder
    x = self.decoder(x[:, -1, :])  # Decode the last time step's output to get the final prediction

    return x

#train transformer model class def train_transformer_model(train_X_scaled, train_y, input_size, d_model, hidden_size, num_layers, output_size, learning_rate, num_epochs, num_heads, dropout_prob, device, n_accumulation_steps=32): train_X_tensor = torch.from_numpy(train_X_scaled).float().to(device) train_y_tensor = torch.from_numpy(train_y).float().unsqueeze(1).to(device)

# Create the dataset and DataLoader
train_data = TensorDataset(train_X_tensor, train_y_tensor)
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)

# Compute the maximum length of the input sequences
max_len = train_X_tensor.size(1)

# Create the model
model = TransformerModel(input_size, hidden_size, num_layers, d_model, num_heads, dropout_prob, output_size, device, max_len).to(device)
q = 0.5
criterion = lambda y_pred, y_true: quantile_loss(q, y_true, y_pred)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(1, num_epochs + 1):
    model.train()
    print(f"Transformer inputs shape: {train_X_tensor.shape}, targets shape: {train_y_tensor.shape}")

    for epoch in range(1, num_epochs + 1):
        model.train()
        print(f"transformer Epoch {epoch}/{num_epochs}")

        for i, (batch_X, batch_y) in enumerate(train_loader):
            batch_X = batch_X.to(device)
            print("transformer batch_X shape:", batch_X.shape)

            batch_y = batch_y.to(device)
            print("transformer batch_Y shape:", batch_y.shape)

            optimizer.zero_grad()
            batch_X = batch_X.transpose(0, 1)
            train_pred = model(batch_X.squeeze(0)).to(device)
            print("train_pred=",train_pred)
            loss = criterion(train_pred, batch_y).to(device)
            loss.backward()

            # Gradient accumulation
            if (i + 1) % n_accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
                print(f"transformer Epoch {epoch}/{num_epochs}, Step {i+1}/{len(train_loader)}, Loss: {loss.item():.6f}")

return model

Hi sugumar0107,

From the Metal API perspective, the series of steps that trigger this assertion are:

  1. Create a command buffer
  2. Commit the command buffer
  3. Add a completed handler to said command buffer

It is not possible to add a completed handler to a command buffer after it has been committed.

In order to prevent this assertion, modify your code to the following sequence:

  1. Create a command buffer
  2. Add a completed handler to said command buffer
  3. Commit the command buffer
-[_MTLCommandBuffer addCompletedHandler:]:867:
 
 
Q