update already saved model

I followed the video of Composing advanced models with Create ML Components.

I have created the model with

let urlParameter = URL(fileURLWithPath: "/path/to/model.pkg")

let (training, validation) = dataFrame.randomSplit(by: 0.8)

let model = try await transformer.fitted(to: DataFrame(training), validateOn: DataFrame(validation)) { event in
    guard let tAccuracy = event.metrics[.trainingAccuracy] as? Double else { return }
    print(tAccuracy)
}

try transformer.write(model, to: url)
print("done")

Next goal is to read the model and update it with new dataFrame

let urlCSV = URL(fileURLWithPath: "path/to/newData.csv")

var model = try transformer.read(from: urlParameters) // loading created model

let newDataFrame = try DataFrame(contentsOfCSVFile: urlCSV ) // new dataFrame with features and annotations

try await transformer.update(&model, with: newDataFrame) // I want to keep previous learned data and update the model with new

try transformer.write(model, to: urlParameters) // the model saves but the only last added dataFrame  are saved. Previous one just replaced with new one 

But looks like I only replace old data with new one.

**The Question ** How can add new data to model I created without losing old one ?

Replies

What kind of model are you training? When you update the model the existing trained weights are used as a starting point. But, depending on the model, if the old data is no longer available the new data may completely override the previous weights. This is particularly true for linear models. I would suggest either saving some of the original data and mixing it in, or using a different model kind such a a fully-connected estimator.

  • Thank you for your answer

    It is actually fully-connected estimator. I added additional code.

Add a Comment
struct VectorData: Codable {
    let label: String
    let features: [Double]
}

func learn(vectors: [VectorData], labels: Set<String>) async throws  {
    
    var classifier = FullyConnectedNetworkClassifier<Float, String>(labels: labels)
    
    let transformer = AnnotatedFeatureProvider(
        OptionalUnwrapper<MLShapedArray<Float>>().appending(classifier),
        annotationsColumnName: "label",
        featuresColumnName: "features",
        resultsColumnName: "result"
    )
    
    let (training, validation) = vectors.randomSplit(by: 0.8)
    
    var model = try transformer.read(from: urlParameters) // reading previously saved model 
    for i in 0..<10 {
        
        let batches = training.shuffled().chunks(ofCount: 10)
        for batch in batches {
            let dataFrame = toDataFrameShapedArray(vectors: Array(batch)) //  method converts [VectorData] to DataFrame
            try await transformer.update(&model, with: dataFrame)
        }
        try transformer.write(model, to: urlParameters)
        print("itteration index : \(i) - pkg saved, coreML saved")
    }
}

private func toDataFrameShapedArray(vectors: [VectorData]) -> DataFrame {
    let encoder = JSONEncoder()
    var dataFrame = DataFrame()
    do {
        let data = try encoder.encode(vectors)
        dataFrame = try DataFrame(jsonData: data)
        
        dataFrame.transformColumn("features") { (value: Array<Any?>) in
            let n = Tabla.shared.toFloat(optionalArray: value) // method convers Array<Any> to Array<Float>
            let multiArray = Tabla.shared.from(n) // method convers [Float] to MultiArray of dim = 1
            let shapedArray = MLShapedArray<Float>(multiArray) // converts MultiArray to MLShapedArray<Float>
            return shapedArray
        }
    } catch {
        print(error)
    }
    
    return dataFrame
}