Next Previous

MPSCNNHelloWorld/MNISTDeepCNN.swift

/*

    Copyright (C) 2016 Apple Inc. All Rights Reserved.

    See LICENSE.txt for this sample’s licensing information

    Abstract:

    This is the deep layer network where we define and encode the correct layers on a command buffer as needed

*/

import MetalPerformanceShaders

/**

    This class has our entire network with all layers to getting the final label

    Resources:

    * [Instructions](https://www.tensorflow.org/versions/r0.8/tutorials/mnist/pros/index.html#deep-mnist-for-experts) to run this network on TensorFlow.

*/

class MNIST_Deep_ConvNN: MNIST_Full_LayerNN{

    // MPSImageDescriptors for different layers outputs to be put in

    let c1id  = MPSImageDescriptor(channelFormat: MPSImageFeatureChannelFormat.float16, width: 28, height: 28, featureChannels: 32)

    let p1id  = MPSImageDescriptor(channelFormat: MPSImageFeatureChannelFormat.float16, width: 14, height: 14, featureChannels: 32)

    let c2id  = MPSImageDescriptor(channelFormat: MPSImageFeatureChannelFormat.float16, width: 14, height: 14, featureChannels: 64)

    let p2id  = MPSImageDescriptor(channelFormat: MPSImageFeatureChannelFormat.float16, width: 7 , height: 7 , featureChannels: 64)

    let fc1id = MPSImageDescriptor(channelFormat: MPSImageFeatureChannelFormat.float16, width: 1 , height: 1 , featureChannels: 1024)

    // MPSImages and layers declared

    var c1Image, c2Image, p1Image, p2Image, fc1Image: MPSImage

    var conv1, conv2: MPSCNNConvolution

    var fc1, fc2: MPSCNNFullyConnected

    var pool: MPSCNNPoolingMax

    var relu: MPSCNNNeuronReLU

    override init(withCommandQueue commandQueueIn: MTLCommandQueue!) {

        // use device for a little while to initialize

        let device = commandQueueIn.device

        pool = MPSCNNPoolingMax(device: device, kernelWidth: 2, kernelHeight: 2, strideInPixelsX: 2, strideInPixelsY: 2)

        pool.offset = MPSOffset(x: 1, y: 1, z: 0);

        pool.edgeMode = MPSImageEdgeMode.clamp

        relu = MPSCNNNeuronReLU(device: device, a: 0)

        // Initialize MPSImage from descriptors

        c1Image     = MPSImage(device: device, imageDescriptor: c1id)

        p1Image     = MPSImage(device: device, imageDescriptor: p1id)

        c2Image     = MPSImage(device: device, imageDescriptor: c2id)

        p2Image     = MPSImage(device: device, imageDescriptor: p2id)

        fc1Image    = MPSImage(device: device, imageDescriptor: fc1id)

        // setup convolution layers

        conv1 = SlimMPSCNNConvolution(kernelWidth: 5,

                                      kernelHeight: 5,

                                      inputFeatureChannels: 1,

                                      outputFeatureChannels: 32,

                                      neuronFilter: relu,

                                      device: device,

                                      kernelParamsBinaryName: "conv1")

        conv2 = SlimMPSCNNConvolution(kernelWidth: 5,

                                      kernelHeight: 5,

                                      inputFeatureChannels: 32,

                                      outputFeatureChannels: 64,

                                      neuronFilter: relu,

                                      device: device,

                                      kernelParamsBinaryName: "conv2")

        // same as a 1x1 convolution filter to produce 1x1x10 from 1x1x1024

        fc1 = SlimMPSCNNFullyConnected(kernelWidth: 7,

                                       kernelHeight: 7,

                                       inputFeatureChannels: 64,

                                       outputFeatureChannels: 1024,

                                       neuronFilter: nil,

                                       device: device,

                                       kernelParamsBinaryName: "fc1")

        fc2 = SlimMPSCNNFullyConnected(kernelWidth: 1,

                                       kernelHeight: 1,

                                       inputFeatureChannels: 1024,

                                       outputFeatureChannels: 10,

                                       neuronFilter: nil,

                                       device: device,

                                       kernelParamsBinaryName: "fc2")

        super.init(withCommandQueue: commandQueueIn)

/**

        This function encodes all the layers of the network into given commandBuffer, it calls subroutines for each piece of the network

        - Parameters:

            - inputImage: Image coming in on which the network will run

            - imageNum: If the test set is being used we will get a value between 0 and 9999 for which of the 10,000 images is being evaluated

            - correctLabel: The correct label for the inputImage while testing

        - Returns:

            Guess of the network as to what the digit is as UInt

*/

    override func forward(inputImage: MPSImage? = nil, imageNum: Int = 9999, correctLabel: UInt = 10) -> UInt{

        var label = UInt(99)

        // to deliver optimal performance we leave some resources used in MPSCNN to be released at next call of autoreleasepool,

        // so the user can decide the appropriate time to release this

        autoreleasepool{

            // Get command buffer to use in MetalPerformanceShaders.

            let commandBuffer = commandQueue.makeCommandBuffer()

            // output will be stored in this image

            let finalLayer = MPSImage(device: commandBuffer.device, imageDescriptor: did)

            // encode layers to metal commandBuffer

            if inputImage == nil {

                conv1.encode(commandBuffer: commandBuffer, sourceImage: srcImage, destinationImage: c1Image)

            else{

                conv1.encode(commandBuffer: commandBuffer, sourceImage: inputImage!, destinationImage: c1Image)

            pool.encode   (commandBuffer: commandBuffer, sourceImage: c1Image   , destinationImage: p1Image)

            conv2.encode  (commandBuffer: commandBuffer, sourceImage: p1Image   , destinationImage: c2Image)

            pool.encode   (commandBuffer: commandBuffer, sourceImage: c2Image   , destinationImage: p2Image)

            fc1.encode    (commandBuffer: commandBuffer, sourceImage: p2Image   , destinationImage: fc1Image)

            fc2.encode    (commandBuffer: commandBuffer, sourceImage: fc1Image  , destinationImage: dstImage)

            softmax.encode(commandBuffer: commandBuffer, sourceImage: dstImage  , destinationImage: finalLayer)

            // add a completion handler to get the correct label the moment GPU is done and compare it to the correct output or return it

            commandBuffer.addCompletedHandler { commandBuffer in

                label = self.getLabel(finalLayer: finalLayer)

                if(correctLabel == label){

                    __atomic_increment()

            // commit commandbuffer to run on GPU and wait for completion

            commandBuffer.commit()

            if imageNum == 9999 {

                commandBuffer.waitUntilCompleted()

        return label

Next Previous