import UIKit import AVFoundation import Vision protocol CameraViewControllerOutputDelegate: class { func cameraViewController(_ controller: CameraViewController, didReceiveBuffer buffer: CMSampleBuffer, orientation: CGImagePropertyOrientation) } class CameraViewController: UIViewController { weak var outputDelegate: CameraViewControllerOutputDelegate? private let videoDataOutputQueue = DispatchQueue(label: "CameraFeedDataOutput", qos: .userInitiated, attributes: [], autoreleaseFrequency: .workItem) // Live camera feed management private var cameraFeedView: CameraFeedView! private var drawingView: UIView = { let view = UIView(frame: UIScreen.main.bounds) view.backgroundColor = .clear return view }() private var cameraFeedSession: AVCaptureSession? override func viewDidLoad() { super.viewDidLoad() do { try setupAVSession() } catch { print("setup av session failed") } } override func viewDidDisappear(_ animated: Bool) { super.viewDidDisappear(animated) // Stop capture session if it's running cameraFeedSession?.stopRunning() } func setupAVSession() throws { // Create device discovery session for a wide angle camera let wideAngle = AVCaptureDevice.DeviceType.builtInWideAngleCamera let discoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [wideAngle], mediaType: .video, position: .front) // Select a video device, make an input guard let videoDevice = discoverySession.devices.first else { throw AppError.captureSessionSetup(reason: "Could not find a wide angle camera device.") } guard let deviceInput = try? AVCaptureDeviceInput(device: videoDevice) else { throw AppError.captureSessionSetup(reason: "Could not create video device input.") } let session = AVCaptureSession() session.beginConfiguration() // We prefer a 1080p video capture but if camera cannot provide it then fall back to highest possible quality if videoDevice.supportsSessionPreset(.hd1920x1080) { session.sessionPreset = .hd1920x1080 } else { session.sessionPreset = .high } // Add a video input guard session.canAddInput(deviceInput) else { throw AppError.captureSessionSetup(reason: "Could not add video device input to the session") } session.addInput(deviceInput) let dataOutput = AVCaptureVideoDataOutput() if session.canAddOutput(dataOutput) { session.addOutput(dataOutput) // Add a video data output dataOutput.alwaysDiscardsLateVideoFrames = true dataOutput.videoSettings = [ String(kCVPixelBufferPixelFormatTypeKey): Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange) ] dataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue) } else { throw AppError.captureSessionSetup(reason: "Could not add video data output to the session") } let captureConnection = dataOutput.connection(with: .video) captureConnection?.preferredVideoStabilizationMode = .standard // Always process the frames captureConnection?.isEnabled = true session.commitConfiguration() cameraFeedSession = session // Get the interface orientaion from window scene to set proper video orientation on capture connection. let videoOrientation: AVCaptureVideoOrientation switch view.window?.windowScene?.interfaceOrientation { case .landscapeRight: videoOrientation = .landscapeRight default: videoOrientation = .portrait } // Create and setup video feed view cameraFeedView = CameraFeedView(frame: view.bounds, session: session, videoOrientation: videoOrientation) setupVideoOutputView(cameraFeedView) cameraFeedSession?.startRunning() } // This helper function is used to convert rects returned by Vision to the video content rect coordinates. // // The video content rect (camera preview or pre-recorded video) // is scaled to fit into the view controller's view frame preserving the video's aspect ratio // and centered vertically and horizontally inside the view. // // Vision coordinates have origin at the bottom left corner and are normalized from 0 to 1 for both dimensions. // func viewRectForVisionRect(_ visionRect: CGRect) -> CGRect { let flippedRect = visionRect.applying(CGAffineTransform.verticalFlip) let viewRect: CGRect if cameraFeedSession != nil { return cameraFeedView.viewRectConverted(fromNormalizedContentsRect: flippedRect) } return .zero } // This helper function is used to convert points returned by Vision to the video content rect coordinates. // // The video content rect (camera preview or pre-recorded video) // is scaled to fit into the view controller's view frame preserving the video's aspect ratio // and centered vertically and horizontally inside the view. // // Vision coordinates have origin at the bottom left corner and are normalized from 0 to 1 for both dimensions. // func viewPointForVisionPoint(_ visionPoint: CGPoint) -> CGPoint { let flippedPoint = visionPoint.applying(CGAffineTransform.verticalFlip) let viewPoint: CGPoint if cameraFeedSession != nil { return cameraFeedView.viewPointConverted(fromNormalizedContentsPoint: flippedPoint) } return .zero } func setupVideoOutputView(_ videoOutputView: UIView) { videoOutputView.translatesAutoresizingMaskIntoConstraints = false videoOutputView.backgroundColor = #colorLiteral(red: 0, green: 0, blue: 0, alpha: 1) view.addSubview(videoOutputView) view.addSubview(drawingView) NSLayoutConstraint.activate([ videoOutputView.leftAnchor.constraint(equalTo: view.leftAnchor), videoOutputView.rightAnchor.constraint(equalTo: view.rightAnchor), videoOutputView.topAnchor.constraint(equalTo: view.topAnchor), videoOutputView.bottomAnchor.constraint(equalTo: view.bottomAnchor), drawingView.leftAnchor.constraint(equalTo: view.leftAnchor), drawingView.rightAnchor.constraint(equalTo: view.rightAnchor), drawingView.topAnchor.constraint(equalTo: view.topAnchor), drawingView.bottomAnchor.constraint(equalTo: view.bottomAnchor) ]) } } extension CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate { func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) { let requestHandler = VNImageRequestHandler(cmSampleBuffer: sampleBuffer, orientation: .left) let request = VNDetectHumanBodyPoseRequest(completionHandler: bodyPoseHandler) do { // Perform the body pose-detection request. try requestHandler.perform([request]) } catch { print("Unable to perform the request: \(error).") } } func bodyPoseHandler(request: VNRequest, error: Error?) { guard let observations = request.results as? [VNRecognizedPointsObservation] else { return } // Process each observation to find the recognized body pose points. observations.forEach { processObservation($0) } } func processObservation(_ observation: VNRecognizedPointsObservation) { // Retrieve all torso points. guard let recTorsoPoints = try? observation.recognizedPoints(forGroupKey: .bodyLandmarkRegionKeyTorso) else { return } guard let recRightArmPoints = try? observation.recognizedPoints(forGroupKey: .bodyLandmarkRegionKeyRightArm) else { return } guard let recLeftArmPoints = try? observation.recognizedPoints(forGroupKey: .bodyLandmarkRegionKeyLeftArm) else { return } guard let recRightLegPoints = try? observation.recognizedPoints(forGroupKey: .bodyLandmarkRegionKeyRightLeg) else { return } guard let recLeftLegPoints = try? observation.recognizedPoints(forGroupKey: .bodyLandmarkRegionKeyLeftLeg) else { return } guard let recFacePoints = try? observation.recognizedPoints(forGroupKey: .bodyLandmarkRegionKeyFace) else { return } // Torso point keys in a clockwise ordering. let torsoKeys: [VNRecognizedPointKey] = [ .bodyLandmarkKeyNeck, .bodyLandmarkKeyNose, .bodyLandmarkKeyLeftEye, .bodyLandmarkKeyLeftEar, .bodyLandmarkKeyRightEye, .bodyLandmarkKeyRightEar, .bodyLandmarkKeyRightShoulder, .bodyLandmarkKeyRightHip, .bodyLandmarkKeyRoot, .bodyLandmarkKeyLeftHip, .bodyLandmarkKeyLeftShoulder, .bodyLandmarkKeyLeftElbow, .bodyLandmarkKeyLeftWrist, .bodyLandmarkKeyRightElbow, .bodyLandmarkKeyRightWrist, .bodyLandmarkKeyLeftKnee, .bodyLandmarkKeyLeftAnkle, .bodyLandmarkKeyRightKnee, .bodyLandmarkKeyRightAnkle ] var recognizedPoints = recFacePoints.merging(recTorsoPoints) { (current, _) in current } recognizedPoints = recFacePoints.merging(recLeftArmPoints) { (current, _) in current } recognizedPoints = recFacePoints.merging(recRightArmPoints) { (current, _) in current } recognizedPoints = recFacePoints.merging(recLeftLegPoints) { (current, _) in current } recognizedPoints = recFacePoints.merging(recRightLegPoints) { (current, _) in current } // Retrieve the CGPoints containing the normalized X and Y coordinates. let imagePoints: [CGPoint] = torsoKeys.compactMap { guard let point = recognizedPoints[$0], point.confidence > 0 else { return nil } // Translate the point from normalized-coordinates to image coordinates. return VNImagePointForNormalizedPoint(point.location, Int(UIScreen.main.bounds.width), Int(UIScreen.main.bounds.height)) } // Draw the points onscreen. DispatchQueue.main.async { self.draw(points: imagePoints) } } func draw(points: [CGPoint]) { drawingView.layer.sublayers?.forEach { $0.removeFromSuperlayer() } points.map { point in let visionPoint = point//.applying(CGAffineTransform.init(rotationAngle: 0/2)) print("0: " + point.debugDescription) print("1: " + visionPoint.debugDescription) let circlePath = UIBezierPath(arcCenter: visionPoint, radius: CGFloat(10), startAngle: CGFloat(0), endAngle: CGFloat(Double.pi * 2), clockwise: true) let shapeLayer = CAShapeLayer() shapeLayer.path = circlePath.cgPath shapeLayer.fillColor = UIColor.red.cgColor shapeLayer.strokeColor = UIColor.red.cgColor shapeLayer.lineWidth = 3.0 drawingView.layer.addSublayer(shapeLayer) } } }