AVMutableVideoCompositionLayerInstruction -Misalignment when Merging Videos

I followed the [Ray Wenderlich]tutorial to merge videos. The finished result is 1 merged video where portrait videos are at the top of the screen and landscape videos are at the bottom of the screen. In the image below the portrait videos plays first and then landscape video plays after it. The landscape video is from the Photos Library.

code:

let mixComposition = AVMutableComposition()
let videoCompositionTrack = mixComposition.addMutableTrack(withMediaType: .video, preferredTrackID: Int32(kCMPersistentTrackID_Invalid))
let audioCompositionTrack = mixComposition.addMutableTrack(withMediaType: .audio, preferredTrackID: Int32(kCMPersistentTrackID_Invalid))
var count = 0
var insertTime = CMTime.zero
var instructions = [AVMutableVideoCompositionInstruction]()
for videoAsset in arrOfAssets {
let audioTrack = videoAsset.tracks(withMediaType: .audio)[0]
do {
try videoCompositionTrack?.insertTimeRange(CMTimeRangeMake(start: .zero, duration: videoAsset.duration), of: videoAsset.tracks(withMediaType: .video)[0], at: insertTime)
try audioCompositionTrack?.insertTimeRange(CMTimeRangeMake(start: .zero, duration: videoAsset.duration), of: audioTrack, at: insertTime)
let layerInstruction = videoCompositionInstruction(videoCompositionTrack!, asset: videoAsset, count: count)
let videoCompositionInstruction = AVMutableVideoCompositionInstruction()
videoCompositionInstruction.timeRange = CMTimeRangeMake(start: insertTime, duration: videoAsset.duration)
videoCompositionInstruction.layerInstructions = [layerInstruction]
instructions.append(videoCompositionInstruction)
insertTime = CMTimeAdd(insertTime, videoAsset.duration)
count += 1
} catch { }
}
let videoComposition = AVMutableVideoComposition()
videoComposition.instructions = instructions
videoComposition.frameDuration = CMTimeMake(value: 1, timescale: 30)
videoComposition.renderSize = CGSize(width: UIScreen.main.bounds.width, height: UIScreen.main.bounds.height)
// ...
exporter.videoComposition = videoComposition

AVMutableVideoCompositionLayerInstruction:

func videoCompositionInstruction(_ track: AVCompositionTrack, asset: AVAsset, count: Int) -> AVMutableVideoCompositionLayerInstruction {
let instruction = AVMutableVideoCompositionLayerInstruction(assetTrack: track)
let assetTrack = asset.tracks(withMediaType: .video)[0]
let transform = assetTrack.preferredTransform
let assetInfo = orientationFromTransform(transform)
var scaleToFitRatio = UIScreen.main.bounds.width / assetTrack.naturalSize.width
if assetInfo.isPortrait {
scaleToFitRatio = UIScreen.main.bounds.width / assetTrack.naturalSize.height
let scaleFactor = CGAffineTransform(scaleX: scaleToFitRatio, y: scaleToFitRatio)
instruction.setTransform(assetTrack.preferredTransform.concatenating(scaleFactor), at: .zero)
} else {
let scaleFactor = CGAffineTransform(scaleX: scaleToFitRatio, y: scaleToFitRatio)
var concat = assetTrack.preferredTransform.concatenating(scaleFactor)
.concatenating(CGAffineTransform(translationX: 0,y: UIScreen.main.bounds.width / 2))
if assetInfo.orientation == .down {
let fixUpsideDown = CGAffineTransform(rotationAngle: CGFloat(Double.pi))
let windowBounds = UIScreen.main.bounds
let yFix = assetTrack.naturalSize.height + windowBounds.height
let centerFix = CGAffineTransform(translationX: assetTrack.naturalSize.width, y: yFix)
concat = fixUpsideDown.concatenating(centerFix).concatenating(scaleFactor)
}
instruction.setTransform(concat, at: .zero)
}
if count == 0 {
instruction.setOpacity(0.0, at: asset.duration)
}
return instruction
}

Orientation:

func orientationFromTransform(_ transform: CGAffineTransform) -> (orientation: UIImage.Orientation, isPortrait: Bool) {
var assetOrientation = UIImage.Orientation.up
var isPortrait = false
let tfA = transform.a
let tfB = transform.b
let tfC = transform.c
let tfD = transform.d
if tfA == 0 && tfB == 1.0 && tfC == -1.0 && tfD == 0 {
assetOrientation = .right
isPortrait = true
} else if tfA == 0 && tfB == -1.0 && tfC == 1.0 && tfD == 0 {
assetOrientation = .left
isPortrait = true
} else if tfA == 1.0 && tfB == 0 && tfC == 0 && tfD == 1.0 {
assetOrientation = .up
} else if tfA == -1.0 && tfB == 0 && tfC == 0 && tfD == -1.0 {
assetOrientation = .down
}
return (assetOrientation, isPortrait)
}

I got it working for both portrait and landscape.

I tested this answer with videos recorded in portrait, landscape left/right, upside down, front camera, and the back camera. I haven't had any issues. I'm far from a CGAffineTransform expert, so if anyone has a better answer please post it.

Ray Wenderlich's merging code works, but it doesn't work for videos with different orientations. I used this answer to check the properties of the preferredTransform for the orientation check.

One thing to point out is the comments from DonMag told me about the benefit of using 720x1280. The code below will merge all of the videos together with a renderSize of 720x1280 which will keep them the same size.

code:

// class property
let renderSize = CGSize(width: 720, height: 1280) // for higher quality use CGSize(width: 1080, height: 1920)
func mergVideos() {
let mixComposition = AVMutableComposition()
let videoCompositionTrack = mixComposition.addMutableTrack(withMediaType: .video, preferredTrackID: Int32(kCMPersistentTrackID_Invalid))
let audioCompositionTrack = mixComposition.addMutableTrack(withMediaType: .audio, preferredTrackID: Int32(kCMPersistentTrackID_Invalid))
var count = 0
var insertTime = CMTime.zero
var instructions = [AVMutableVideoCompositionInstruction]()
for videoAsset in arrOfAssets {
guard let firstTrack = videoAsset.tracks.first, let _ = videoAsset.tracks(withMediaType: .video).first else { continue }
do {
try videoCompositionTrack?.insertTimeRange(CMTimeRangeMake(start: .zero, duration: videoAsset.duration), of: videoAsset.tracks(withMediaType: .video)[0], at: insertTime)
if let audioTrack = videoAsset.tracks(withMediaType: .audio).first {
try audioCompositionTrack?.insertTimeRange(CMTimeRangeMake(start: .zero, duration: videoAsset.duration), of: audioTrack, at: insertTime)
}
let layerInstruction = videoCompositionInstruction(firstTrack, asset: videoAsset, count: count)
let videoCompositionInstruction = AVMutableVideoCompositionInstruction()
videoCompositionInstruction.timeRange = CMTimeRangeMake(start: insertTime, duration: videoAsset.duration)
videoCompositionInstruction.layerInstructions = [layerInstruction]
instructions.append(videoCompositionInstruction)
insertTime = CMTimeAdd(insertTime, videoAsset.duration)
count += 1
} catch { }
}
let videoComposition = AVMutableVideoComposition()
videoComposition.instructions = instructions
videoComposition.frameDuration = CMTimeMake(value: 1, timescale: 30)
videoComposition.renderSize = self.renderSize // <--- **** IMPORTANT ****
// ...
exporter.videoComposition = videoComposition
}

Most important part of this answer that replaces the RW code:

func videoCompositionInstruction(_ firstTrack: AVAssetTrack, asset: AVAsset, count: Int) -> AVMutableVideoCompositionLayerInstruction {
let instruction = AVMutableVideoCompositionLayerInstruction(assetTrack: firstTrack)
let assetTrack = asset.tracks(withMediaType: .video)[0]
let t = assetTrack.fixedPreferredTransform // new transform fix
let assetInfo = orientationFromTransform(t)
if assetInfo.isPortrait {
let scaleToFitRatio = self.renderSize.width / assetTrack.naturalSize.height
let scaleFactor = CGAffineTransform(scaleX: scaleToFitRatio, y: scaleToFitRatio)
var finalTransform = assetTrack.fixedPreferredTransform.concatenating(scaleFactor)
// From the OP that I used for the portrait part. I haven't tested this, his words: "if video not taking entire screen and leaving some parts black - don't know when actually needed so test"
if assetInfo.orientation == .rightMirrored || assetInfo.orientation == .leftMirrored {
finalTransform = finalTransform.translatedBy(x: -transform.ty, y: 0)
}
instruction.setTransform(finalTransform, at: CMTime.zero)
} else {
let renderRect = CGRect(x: 0, y: 0, width: self.renderSize.width, height: self.renderSize.height)
let videoRect = CGRect(origin: .zero, size: assetTrack.naturalSize).applying(assetTrack.fixedPreferredTransform)
let scale = renderRect.width / videoRect.width
let transform = CGAffineTransform(scaleX: renderRect.width / videoRect.width, y: (videoRect.height * scale) / assetTrack.naturalSize.height)
let translate = CGAffineTransform(translationX: .zero, y: ((self.renderSize.height - (videoRect.height * scale))) / 2)
instruction.setTransform(assetTrack.fixedPreferredTransform.concatenating(transform).concatenating(translate), at: .zero)
}
if count == 0 {
instruction.setOpacity(0.0, at: asset.duration)
}
return instruction
}

New orientation check:

func orientationFromTransform(_ transform: CGAffineTransform) -> (orientation: UIImage.Orientation, isPortrait: Bool) {
var assetOrientation = UIImage.Orientation.up
var isPortrait = false
if transform.a == 0 && transform.b == 1.0 && transform.c == -1.0 && transform.d == 0 {
assetOrientation = .right
isPortrait = true
} else if transform.a == 0 && transform.b == 1.0 && transform.c == 1.0 && transform.d == 0 {
assetOrientation = .rightMirrored
isPortrait = true
} else if transform.a == 0 && transform.b == -1.0 && transform.c == 1.0 && transform.d == 0 {
assetOrientation = .left
isPortrait = true
} else if transform.a == 0 && transform.b == -1.0 && transform.c == -1.0 && transform.d == 0 {
assetOrientation = .leftMirrored
isPortrait = true
} else if transform.a == 1.0 && transform.b == 0 && transform.c == 0 && transform.d == 1.0 {
assetOrientation = .up
} else if transform.a == -1.0 && transform.b == 0 && transform.c == 0 && transform.d == -1.0 {
assetOrientation = .down
}
}

preferredTransform fix:

extension AVAssetTrack {
var fixedPreferredTransform: CGAffineTransform {
var t = preferredTransform
switch(t.a, t.b, t.c, t.d) {
case (1, 0, 0, 1):
t.tx = 0
t.ty = 0
case (1, 0, 0, -1):
t.tx = 0
t.ty = naturalSize.height
case (-1, 0, 0, 1):
t.tx = naturalSize.width
t.ty = 0
case (-1, 0, 0, -1):
t.tx = naturalSize.width
t.ty = naturalSize.height
case (0, -1, 1, 0):
t.tx = 0
t.ty = naturalSize.width
case (0, 1, -1, 0):
t.tx = naturalSize.height
t.ty = 0
case (0, 1, 1, 0):
t.tx = 0
t.ty = 0
case (0, -1, -1, 0):
t.tx = naturalSize.height
t.ty = naturalSize.width
default:
break
}
return t
}
}
AVMutableVideoCompositionLayerInstruction -Misalignment when Merging Videos
 
 
Q