基于iPhone 14 Max相机,实现模型识别,并在识别对象周围画一个矩形框。宽度和高度使用激光雷达计算,并在实时更新的图像上以厘米为单位显示。
import UIKit import AVFoundation import CoreML import Vision import ARKit import Photos import CoreMotion class DoorsByYolov8mlcore: UIViewController, ARSCNViewDelegate, AVCaptureVideoDataOutputSampleBufferDelegate { // MARK: - Properties var sceneView: ARSCNView! var captureSession: AVCaptureSession! var videoPreviewLayer: AVCaptureVideoPreviewLayer! var model: VNCoreMLModel? let iphoneStatus = GetIPhoneStatusClass() // MARK: - Lifecycle Methods override func viewDidLoad() { super.viewDidLoad() checkPermissions() loadModel() if model == nil { print("WARNING: Model failed to load") } setupCamera() sceneView = ARSCNView(frame: self.view.frame) self.view.addSubview(sceneView) let configuration = ARWorldTrackingConfiguration() configuration.sceneReconstruction = .meshWithClassification sceneView.session.run(configuration) sceneView.delegate = self print("SceneView initialized: \(sceneView != nil)") } override func viewWillDisappear(_ animated: Bool) { super.viewWillDisappear(animated) captureSession?.stopRunning() sceneView.session.pause() } deinit { NotificationCenter.default.removeObserver(self) } // MARK: - Setup Methods private func checkPermissions() { switch AVCaptureDevice.authorizationStatus(for: .video) { case .authorized: break case .notDetermined: AVCaptureDevice.requestAccess(for: .video) { granted in if granted { DispatchQueue.main.async { self.setupCamera() } } } default: showPermissionAlert() } } private func showPermissionAlert() { let alert = UIAlertController( title: "需要相机权限", message: "此应用需要访问您的相机来检测门。请在设置中允许相机访问权限。", preferredStyle: .alert ) alert.addAction(UIAlertAction( title: "取消", style: .cancel )) alert.addAction(UIAlertAction( title: "前往设置", style: .default, handler: { _ in if let settingsURL = URL(string: UIApplication.openSettingsURLString) { UIApplication.shared.open(settingsURL) } } )) DispatchQueue.main.async { [weak self] in self?.present(alert, animated: true) } } func setupCamera() { captureSession = AVCaptureSession() if captureSession.canSetSessionPreset(.hd1280x720) { captureSession.sessionPreset = .hd1280x720 } guard let videoCaptureDevice = AVCaptureDevice.default(for: .video) else { print("No video device found") return } let videoDeviceInput: AVCaptureDeviceInput do { videoDeviceInput = try AVCaptureDeviceInput(device: videoCaptureDevice) } catch { print("Error accessing camera: \(error)") return } if captureSession.canAddInput(videoDeviceInput) { captureSession.addInput(videoDeviceInput) } else { print("Could not add video input") return } videoPreviewLayer = AVCaptureVideoPreviewLayer(session: captureSession) videoPreviewLayer.frame = view.bounds videoPreviewLayer.videoGravity = .resizeAspectFill view.layer.addSublayer(videoPreviewLayer) let videoDataOutput = AVCaptureVideoDataOutput() videoDataOutput.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_32BGRA) ] if captureSession.canAddOutput(videoDataOutput) { captureSession.addOutput(videoDataOutput) videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "videoQueue")) } else { print("Could not add video output") return } DispatchQueue.global(qos: .background).async { self.captureSession.startRunning() } } func loadModel() { guard let modelURL = Bundle.main.url(forResource: "doorsjj", withExtension: "mlmodelc") else { print("Error: doors.mlmodelc not found in bundle") return } do { let coremlModel = try MLModel(contentsOf: modelURL) self.model = try VNCoreMLModel(for: coremlModel) print("Model loaded successfully") } catch { print("Error loading CoreML model: \(error.localizedDescription)") } } func processVideoFrame(pixelBuffer: CVPixelBuffer) { let iphoneStatus = GetIPhoneStatusClass() let ciImage = CIImage(cvPixelBuffer: pixelBuffer) let targetSize = CGSize(width: 736, height: 1280) let extent = ciImage.extent let scaleX = targetSize.width / extent.width let scaleY = targetSize.height / extent.height let scale = min(scaleX, scaleY) let scaledImage = ciImage.transformed(by: CGAffineTransform(scaleX: scale, y: scale)) let croppedImage = scaledImage.cropped(to: CGRect(x: 0, y: 0, width: 720, height: 1280)) let orientation = iphoneStatus.getCurrentDeviceOrientation() let rotatedImage: CIImage switch orientation { case .portrait: rotatedImage = croppedImage case .landscapeLeft: rotatedImage = croppedImage.transformed(by: CGAffineTransform(rotationAngle: -.pi / 2)) case .landscapeRight: rotatedImage = croppedImage.transformed(by: CGAffineTransform(rotationAngle: .pi / 2)) case .portraitUpsideDown: rotatedImage = croppedImage.transformed(by: CGAffineTransform(rotationAngle: .pi)) default: rotatedImage = croppedImage.transformed(by: CGAffineTransform(rotationAngle: .pi / 2)) } recognizeImage(image: rotatedImage) } func recognizeImage(image: CIImage) { guard let model = self.model else { print("Model not loaded") return } let request = VNCoreMLRequest(model: model) { [weak self] request, error in guard let self = self else { return } if let error = error { print("Detection error: \(error)") return } guard let results = request.results as? [VNRecognizedObjectObservation] else { print("No results or invalid type") return } if results.isEmpty { print("No objects detected in this frame") } else { var doorCount = 0 var doorObservations: [VNRecognizedObjectObservation] = [] for observation in results { if let label = observation.labels.first?.identifier, label == "door" { doorCount += 1 doorObservations.append(observation) } } DispatchQueue.main.async { self.drawDetections(doorObservations) } } } request.imageCropAndScaleOption = .scaleFit do { let handler = VNImageRequestHandler(ciImage: image, orientation: .up, options: [:]) try handler.perform([request]) } catch { print("Failed to perform detection: \(error)") } } func drawDetections(_ observations: [VNRecognizedObjectObservation]) { view.layer.sublayers?.removeAll(where: { $0.name == "detectionLayer" }) guard let frame = sceneView.session.currentFrame else { return } for observation in observations { let boundingBox = observation.boundingBox let viewBounds = view.bounds let normalizedRect = VNImageRectForNormalizedRect( boundingBox, Int(viewBounds.width), Int(viewBounds.height) ) let boxLayer = CALayer() boxLayer.name = "detectionLayer" boxLayer.frame = normalizedRect boxLayer.borderColor = UIColor.green.cgColor boxLayer.borderWidth = 2 boxLayer.cornerRadius = 4 view.layer.addSublayer(boxLayer) let dimensions = calculateDimensions(at: boundingBox, frame: frame) let width = dimensions.width let height = dimensions.height let label = UILabel(frame: CGRect(x: normalizedRect.minX, y: normalizedRect.minY - 20, width: 100, height: 20)) label.text = String(format: "W: %.2fcm, H: %.2fcm", width, height) label.textColor = .white label.backgroundColor = .black.withAlphaComponent(0.5) label.font = UIFont.systemFont(ofSize: 12) view.addSubview(label) } } func calculateDimensions(at boundingBox: CGRect, frame: ARFrame) -> (width: Float, height: Float) { if let depthData = frame.sceneDepth { let depthMap = depthData.depthMap let topLeft = CGPoint(x: boundingBox.minX, y: boundingBox.minY) let topRight = CGPoint(x: boundingBox.maxX, y: boundingBox.minY) let bottomLeft = CGPoint(x: boundingBox.minX, y: boundingBox.maxY) let bottomRight = CGPoint(x: boundingBox.maxX, y: boundingBox.maxY) let topLeftPosition = unprojectPoint(topLeft, depthMap: depthMap, frame: frame) let topRightPosition = unprojectPoint(topRight, depthMap: depthMap, frame: frame) let bottomLeftPosition = unprojectPoint(bottomLeft, depthMap: depthMap, frame: frame) let bottomRightPosition = unprojectPoint(bottomRight, depthMap: depthMap, frame: frame) let width = distanceBetweenPoints(topLeftPosition, topRightPosition) * 100 // Convert to cm let height = distanceBetweenPoints(topLeftPosition, bottomLeftPosition) * 100 // Convert to cm return (width, height) } return (0, 0) } func unprojectPoint(_ point: CGPoint, depthMap: CVPixelBuffer, frame: ARFrame) -> simd_float4 { let pixel = CGPoint(x: point.x, y: point.y) // 获取深度值 let depth = getDepthValue(at: pixel, from: depthMap) // 将像素位置与深度值一起转换为世界坐标 let worldPosition = frame.camera.transform * simd_float4(Float(pixel.x), Float(pixel.y), Float(depth), 1) return worldPosition } func getDepthValue(at point: CGPoint, from depthMap: CVPixelBuffer) -> Float { // 获取深度图像素数据 let width = CVPixelBufferGetWidth(depthMap) let height = CVPixelBufferGetHeight(depthMap) // 确保点在深度图的范围内 guard point.x >= 0 && point.x < CGFloat(width) && point.y >= 0 && point.y < CGFloat(height) else { return 0.0 } // 获取深度数据 let baseAddress = CVPixelBufferGetBaseAddress(depthMap) let bytesPerRow = CVPixelBufferGetBytesPerRow(depthMap) let pixelAddress = baseAddress! + Int(point.y) * bytesPerRow + Int(point.x) * MemoryLayout.size let depthValue = pixelAddress.withMemoryRebound(to: Float.self, capacity: 1) { $0.pointee } return depthValue } func distanceBetweenPoints(_ p1: simd_float4, _ p2: simd_float4) -> Float { let dx = p1.x - p2.x let dy = p1.y - p2.y let dz = p1.z - p2.z return sqrt(dx * dx + dy * dy + dz * dz) } }
Replies
1
Boosts
0
Views
358
Participants
2