Hello!
I have a TrackNet model that I have converted to CoreML (.mlpackage) using coremltools, and the conversion process appears to go smoothly as I get the .mlpackage file I am looking for with the weights and model.mlmodel file in the folder. However, when I drag it into Xcode, it just shows up as 4 script tags (as pictured) instead of the model "interface" that is typically expected. I initially was concerned that my model was not compatible with CoreML, but upon logging the conversions, everything seems to be converted properly.
I have some code that may be relevant in debugging this issue: How I use the model:
model = BallTrackerNet() # this is the model architecture which will be referenced later device = self.device # cpu model.load_state_dict(torch.load("models/balltrackerbest.pt", map_location=device)) # balltrackerbest is the weights model = model.to(device) model.eval()
Here is the BallTrackerNet() model itself:
import torch.nn as nn import torch class ConvBlock(nn.Module): def __init__(self, in_channels, out_channels, kernel_size=3, pad=1, stride=1, bias=True): super().__init__() self.block = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=pad, bias=bias), nn.ReLU(), nn.BatchNorm2d(out_channels) ) def forward(self, x): return self.block(x) class BallTrackerNet(nn.Module): def __init__(self, out_channels=256): super().__init__() self.out_channels = out_channels self.conv1 = ConvBlock(in_channels=9, out_channels=64) self.conv2 = ConvBlock(in_channels=64, out_channels=64) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv3 = ConvBlock(in_channels=64, out_channels=128) self.conv4 = ConvBlock(in_channels=128, out_channels=128) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv5 = ConvBlock(in_channels=128, out_channels=256) self.conv6 = ConvBlock(in_channels=256, out_channels=256) self.conv7 = ConvBlock(in_channels=256, out_channels=256) self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv8 = ConvBlock(in_channels=256, out_channels=512) self.conv9 = ConvBlock(in_channels=512, out_channels=512) self.conv10 = ConvBlock(in_channels=512, out_channels=512) self.ups1 = nn.Upsample(scale_factor=2) self.conv11 = ConvBlock(in_channels=512, out_channels=256) self.conv12 = ConvBlock(in_channels=256, out_channels=256) self.conv13 = ConvBlock(in_channels=256, out_channels=256) self.ups2 = nn.Upsample(scale_factor=2) self.conv14 = ConvBlock(in_channels=256, out_channels=128) self.conv15 = ConvBlock(in_channels=128, out_channels=128) self.ups3 = nn.Upsample(scale_factor=2) self.conv16 = ConvBlock(in_channels=128, out_channels=64) self.conv17 = ConvBlock(in_channels=64, out_channels=64) self.conv18 = ConvBlock(in_channels=64, out_channels=self.out_channels) self.softmax = nn.Softmax(dim=1) self._init_weights() def forward(self, x, testing=False): batch_size = x.size(0) x = self.conv1(x) x = self.conv2(x) x = self.pool1(x) x = self.conv3(x) x = self.conv4(x) x = self.pool2(x) x = self.conv5(x) x = self.conv6(x) x = self.conv7(x) x = self.pool3(x) x = self.conv8(x) x = self.conv9(x) x = self.conv10(x) x = self.ups1(x) x = self.conv11(x) x = self.conv12(x) x = self.conv13(x) x = self.ups2(x) x = self.conv14(x) x = self.conv15(x) x = self.ups3(x) x = self.conv16(x) x = self.conv17(x) x = self.conv18(x) # x = self.softmax(x) out = x.reshape(batch_size, self.out_channels, -1) if testing: out = self.softmax(out) return out def _init_weights(self): for module in self.modules(): if isinstance(module, nn.Conv2d): nn.init.uniform_(module.weight, -0.05, 0.05) if module.bias is not None: nn.init.constant_(module.bias, 0) elif isinstance(module, nn.BatchNorm2d): nn.init.constant_(module.weight, 1) nn.init.constant_(module.bias, 0)
Here is also the meta data of my model:
[ { "metadataOutputVersion" : "3.0", "storagePrecision" : "Float16", "outputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float32", "formattedType" : "MultiArray (Float32 1 × 256 × 230400)", "shortDescription" : "", "shape" : "[1, 256, 230400]", "name" : "var_462", "type" : "MultiArray" } ], "modelParameters" : [ ], "specificationVersion" : 6, "mlProgramOperationTypeHistogram" : { "Cast" : 2, "Conv" : 18, "Relu" : 18, "BatchNorm" : 18, "Reshape" : 1, "UpsampleNearestNeighbor" : 3, "MaxPool" : 3 }, "computePrecision" : "Mixed (Float16, Float32, Int32)", "isUpdatable" : "0", "availability" : { "macOS" : "12.0", "tvOS" : "15.0", "visionOS" : "1.0", "watchOS" : "8.0", "iOS" : "15.0", "macCatalyst" : "15.0" }, "modelType" : { "name" : "MLModelType_mlProgram" }, "userDefinedMetadata" : { "com.github.apple.coremltools.source_dialect" : "TorchScript", "com.github.apple.coremltools.source" : "torch==2.5.1", "com.github.apple.coremltools.version" : "8.1" }, "inputSchema" : [ { "hasShapeFlexibility" : "0", "isOptional" : "0", "dataType" : "Float32", "formattedType" : "MultiArray (Float32 1 × 9 × 360 × 640)", "shortDescription" : "", "shape" : "[1, 9, 360, 640]", "name" : "input_frames", "type" : "MultiArray" } ], "generatedClassName" : "BallTracker", "method" : "predict" } ]
I have been struggling with this conversion for almost 2 weeks now so any help, ideas or pointers would be greatly appreciated! Let me know if any other information would be helpful to see as well.
Thanks!
Michael
@michaeldegoat I think it's a duplicate of https://developer.apple.com/forums/thread/772749?