I specifically give the model the shape of (1, 3, 1024, 1024), but for some reason, CoreML thinks it's 2 channels instead of 3. The pytorch model is based on this - LINK
The "local.pth" model to be specific. My CoreML conversion code is attached below.
#from networks.drn_seg import DRNSeg
import coremltools as ct
import coremltools.proto.FeatureTypes_pb2 as ft
import io
from PIL import Image
from torchvision import transforms
import math
import torch
import torch.nn as nn
from networks.drn import drn_c_26
import torchvision
from torchvision.io import read_image
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class DRNSeg(nn.Module):
def __init__(self):
super(DRNSeg, self).__init__()
classes=2
pretrained_drn=None
pretrained_model=None
use_torch_up=False
model = drn_c_26(pretrained=pretrained_drn)
self.base = nn.Sequential(*list(model.children())[:-2])
if pretrained_model:
self.load_pretrained(pretrained_model)
self.seg = nn.Conv2d(model.out_dim, classes,
kernel_size=1, bias=True)
m = self.seg
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
m.bias.data.zero_()
if use_torch_up:
self.up = nn.UpsamplingBilinear2d(scale_factor=8)
else:
up = nn.ConvTranspose2d(classes, classes, 16, stride=8, padding=4,
output_padding=0, groups=classes,
bias=False)
fill_up_weights(up)
up.weight.requires_grad = False
self.up = up
def forward(self, x):
x = self.base(x)
x = self.seg(x)
y = self.up(x)
return y
def optim_parameters(self, memo=None):
for param in self.base.parameters():
yield param
for param in self.seg.parameters():
yield param
def load_pretrained(self, pretrained_model):
print("loading the pretrained drn model from %s" % pretrained_model)
state_dict = torch.load(pretrained_model, map_location='cpu')
if hasattr(state_dict, '_metadata'):
del state_dict._metadata
# filter out unnecessary keys
pretrained_dict = state_dict['model']
pretrained_dict = {k[5:]: v for k, v in pretrained_dict.items() if k.split('.')[0] == 'base'}
# load the pretrained state dict
self.base.load_state_dict(pretrained_dict)
#img = read_image('test.jpg')
model_path = ("modelphotoshop.pth")
device = 'cpu'
model_flow = DRNSeg()
model_flow.load_state_dict(torch.load(model_path, map_location=device))
model_flow.to(device)
model_flow.eval()
example_input = torch.randn([1,3,1024,1024])
traced_model = torch.jit.trace(model_flow, example_input)
traced_model.save("modelphotoshop.pt")
shape = (1,3, 1024, 1024)
mlmodel = ct.convert(traced_model,
convert_to="mlprogram",
inputs=[ct.ImageType(name="colorImage",
shape=shape,
color_layout=ct.colorlayout.RGB,)],
outputs=[ct.ImageType(name="colorOutput",
color_layout=ct.colorlayout.RGB,)],
)
mlmodel.save("Model.mlmodel")