-[_MTLCommandBuffer addCompletedHandler:]:867:

failed assertion `Completed handler provided after commit call'.

how to clear this error any. when i run with cpu i am getting storage error so i tried with GPU.

partial code

#PositionalEncoding class PositionalEncoding(nn.Module): def init(self, d_model, max_len, dropout_prob=0.1): super(PositionalEncoding, self).init() self.dropout = nn.Dropout(p=dropout_prob)

    # Create positional encoding matrix
    pe = torch.zeros(max_len, d_model)
    position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
    
    # Pad div_term with zeros if necessary
    div_term_padded = torch.zeros(d_model)
    div_term_padded[:div_term.size(0)] = div_term

    pe[:, 0::2] = torch.sin(position * div_term_padded[0::2])
    pe[:, 1::2] = torch.cos(position * div_term_padded[1::2])
    pe = pe.unsqueeze(0).transpose(0, 1)
    self.register_buffer('pe', pe)
    
def forward(self, x):
    x = x + self.pe[:x.size(0), :]
    return self.dropout(x)

#transformermodel class

class TransformerModel(nn.Module): def init(self, input_size, hidden_size, num_layers, d_model, num_heads, dropout_prob, output_size, device, max_len): super(TransformerModel, self).init() self.device = device self.hidden_size = hidden_size self.d_model = d_model self.num_heads = num_heads

    #self.embedding = nn.Embedding(input_size, d_model).to(device)
    self.embedding = nn.Linear(input_size, d_model).to(device)

    self.pos_encoder = PositionalEncoding(d_model, max_len, dropout_prob).to(device)

    self.transformer_encoder_layer = nn.TransformerEncoderLayer(d_model, num_heads, hidden_size, dropout_prob).to(device)
    self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer, num_layers).to(device)
    self.decoder = nn.Linear(d_model, output_size).to(device)
    self.to(device)  # Ensure the model is on the correct device

def forward(self, x):
    #x = x.long()
    x = x.transpose(0, 1)  # Transpose the input tensor to match the expected shape for the transformer
    x = x.squeeze()  # Remove the extra dimension from the input tensor

    x = self.embedding(x)  # Apply the input embedding
    x = self.pos_encoder(x)  # Add positional encoding
    x = self.transformer_encoder(x)  # Apply the transformer encoder
    x = self.decoder(x[:, -1, :])  # Decode the last time step's output to get the final prediction

    return x

#train transformer model class def train_transformer_model(train_X_scaled, train_y, input_size, d_model, hidden_size, num_layers, output_size, learning_rate, num_epochs, num_heads, dropout_prob, device, n_accumulation_steps=32): train_X_tensor = torch.from_numpy(train_X_scaled).float().to(device) train_y_tensor = torch.from_numpy(train_y).float().unsqueeze(1).to(device)

# Create the dataset and DataLoader
train_data = TensorDataset(train_X_tensor, train_y_tensor)
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)

# Compute the maximum length of the input sequences
max_len = train_X_tensor.size(1)

# Create the model
model = TransformerModel(input_size, hidden_size, num_layers, d_model, num_heads, dropout_prob, output_size, device, max_len).to(device)
q = 0.5
criterion = lambda y_pred, y_true: quantile_loss(q, y_true, y_pred)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(1, num_epochs + 1):
    model.train()
    print(f"Transformer inputs shape: {train_X_tensor.shape}, targets shape: {train_y_tensor.shape}")

    for epoch in range(1, num_epochs + 1):
        model.train()
        print(f"transformer Epoch {epoch}/{num_epochs}")

        for i, (batch_X, batch_y) in enumerate(train_loader):
            batch_X = batch_X.to(device)
            print("transformer batch_X shape:", batch_X.shape)

            batch_y = batch_y.to(device)
            print("transformer batch_Y shape:", batch_y.shape)

            optimizer.zero_grad()
            batch_X = batch_X.transpose(0, 1)
            train_pred = model(batch_X.squeeze(0)).to(device)
            print("train_pred=",train_pred)
            loss = criterion(train_pred, batch_y).to(device)
            loss.backward()

            # Gradient accumulation
            if (i + 1) % n_accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
                print(f"transformer Epoch {epoch}/{num_epochs}, Step {i+1}/{len(train_loader)}, Loss: {loss.item():.6f}")

return model
Post not yet marked as solved Up vote post of sugumar0107 Down vote post of sugumar0107
1.1k views

Replies

Hi sugumar0107,

From the Metal API perspective, the series of steps that trigger this assertion are:

  1. Create a command buffer
  2. Commit the command buffer
  3. Add a completed handler to said command buffer

It is not possible to add a completed handler to a command buffer after it has been committed.

In order to prevent this assertion, modify your code to the following sequence:

  1. Create a command buffer
  2. Add a completed handler to said command buffer
  3. Commit the command buffer