Data storage for a Matrix struct when working with Accelerate

I have a Matrix structure as defined below for working with 2D numerical data in Accelerate. The underlying numerical data in this Matrix struct is stored as an Array.

struct Matrix<T> {
    let rows: Int
    let columns: Int
    var data: [T]

    init(rows: Int, columns: Int, fill: T) {
        self.rows = rows
        self.columns = columns
        self.data = Array(repeating: fill, count: rows * columns)
    }

    init(rows: Int, columns: Int, source: (inout UnsafeMutableBufferPointer<T>) -> Void) {
        self.rows = rows
        self.columns = columns
        self.data = Array(unsafeUninitializedCapacity: rows * columns) { buffer, initializedCount in
            source(&buffer)
            initializedCount = rows * columns
        }
    }

    subscript(row: Int, column: Int) -> T {
        get { return self.data[(row * self.columns) + column] }
        set { self.data[(row * self.columns) + column] = newValue }
    }
}

Multiplication is implemented by the functions shown below.

import Accelerate

infix operator .*

func .* (lhs: Matrix<Double>, rhs: Matrix<Double>) -> Matrix<Double> {
    precondition(lhs.rows == rhs.rows && lhs.columns == rhs.columns, "Matrices must have same dimensions")
    let result = Matrix<Double>(rows: lhs.rows, columns: rhs.columns) { buffer in
        vDSP.multiply(lhs.data, rhs.data, result: &buffer)
    }
    return result
}

func * (lhs: Matrix<Double>, rhs: Matrix<Double>) -> Matrix<Double> {
    precondition(lhs.columns == rhs.rows, "Number of columns in left matrix must equal number of rows in right matrix")
    var a = lhs.data
    var b = rhs.data

    let m = lhs.rows     // number of rows in matrices A and C
    let n = rhs.columns  // number of columns in matrices B and C
    let k = lhs.columns  // number of columns in matrix A; number of rows in matrix B
    let alpha = 1.0
    let beta = 0.0

    // matrix multiplication where C ← αAB + βC
    let c = Matrix<Double>(rows: lhs.rows, columns: rhs.columns) { buffer in
        cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, &a, k, &b, n, beta, buffer.baseAddress, n)
    }

    return c
}

I can also define a Matrix structure where the underlying data is an UnsafeMutableBufferPointer. The buffer is handled by the MatrixData class.

struct Matrix<T> {
    let rows: Int
    let columns: Int
    var data: MatrixData<T>

    init(rows: Int, columns: Int, fill: T) {
        self.rows = rows
        self.columns = columns
        self.data = MatrixData(count: rows * columns, fill: fill)
    }

    init(rows: Int, columns: Int) {
        self.rows = rows
        self.columns = columns
        self.data = MatrixData(count: rows * columns)
    }

    subscript(row: Int, column: Int) -> T {
        get { return self.data.buffer[(row * self.columns) + column] }
        set { self.data.buffer[(row * self.columns) + column] = newValue }
    }
}
class MatrixData<T> {
    var buffer: UnsafeMutableBufferPointer<T>
    var baseAddress: UnsafeMutablePointer<T> {
        get { self.buffer.baseAddress! }
    }

    init(count: Int, fill: T) {
        let start = UnsafeMutablePointer<T>.allocate(capacity: count)
        self.buffer = UnsafeMutableBufferPointer(start: start, count: count)
        self.buffer.initialize(repeating: fill)
    }

    init(count: Int) {
        let start = UnsafeMutablePointer<T>.allocate(capacity: count)
        self.buffer = UnsafeMutableBufferPointer(start: start, count: count)
    }

    deinit {
        self.buffer.deinitialize()
        self.buffer.deallocate()
    }
}

Multiplication for this approach is implemented by the functions shown here.

import Accelerate

infix operator .*

func .* (lhs: Matrix<Double>, rhs: Matrix<Double>) -> Matrix<Double> {
    precondition(lhs.rows == rhs.rows && lhs.columns == rhs.columns, "Matrices must have same dimensions")
    let result = Matrix<Double>(rows: lhs.rows, columns: lhs.columns)
    vDSP.multiply(lhs.data.buffer, rhs.data.buffer, result: &result.data.buffer)
    return result
}

func * (lhs: Matrix<Double>, rhs: Matrix<Double>) -> Matrix<Double> {
    precondition(lhs.columns == rhs.rows, "Number of columns in left matrix must equal number of rows in right matrix")
    let a = lhs.data.baseAddress
    let b = rhs.data.baseAddress

    let m = lhs.rows     // number of rows in matrices A and C
    let n = rhs.columns  // number of columns in matrices B and C
    let k = lhs.columns  // number of columns in matrix A; number of rows in matrix B
    let alpha = 1.0
    let beta = 0.0

    // matrix multiplication where C ← αAB + βC
    let c = Matrix<Double>(rows: lhs.rows, columns: rhs.columns)
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, a, k, b, n, beta, c.data.baseAddress, n)

    return c
}

Both of these approaches give me similar performance. The only difference that I have noticed is the matrix buffer approach allows for reference semantics. For example, the code below uses half the memory with the matrix buffer approach compared to the matrix array approach. This is because b acts as a reference to a using the matrix buffer approach; otherwise, the matrix array approach makes a full copy of a.

let n = 10_000
let a = Matrix<Double>(rows: n, columns: n, fill: 0)
var b = a
b[0, 0] = 99
b[0, 1] = 22

Other than reference semantics, are there any reasons to use one of these approaches over the other?

Aren’t we already having this conversation on Swift Forums?

Share and Enjoy

Quinn “The Eskimo!” @ Developer Technical Support @ Apple
let myEmail = "eskimo" + "1" + "@" + "apple.com"

It might be worth looking at an operation such as *= where you need to start using withUnsafeMutableBufferPointer for your array based solution.

I would suggest that basing the matrix on an UnsafeMutableBufferPointer will yield better performance, but you are responsible for validating inputs and deallocating data. If you use this approach, you have stable, contiguous memory and there's no copying to temporary buffers when passing your matrix data to BLAS.

There's a great WWDC video at https://developer.apple.com/videos/play/wwdc2020/10648/ which discusses the pros and cons of Swift's unsafe pointer APIs.

I would also like to mention that running the code below gives me almost identical elapsed times for the matrix array and matrix buffer solutions. So, at least for this case, I'm not seeing any performance differences between the two approaches.

func runBenchmark1() {
    print("Benchmark matrix multiplication")
    for _ in 1...3 {
        let tic = Date.now

        let n = 8_000
        let a = Matrix<Double>(rows: n, columns: n, fill: 1.5)
        let b = Matrix<Double>(rows: n, columns: n, fill: 2.8)
        let c = a * b

        let toc = tic.timeIntervalSinceNow.magnitude
        let elapsed = String(format: "%.4f", toc)
        print("Elapsed time is \(elapsed) sec, first element is \(c[0, 0])")
    }
}
Data storage for a Matrix struct when working with Accelerate
 
 
Q