Functions
FASTCV_API int32_t	fcvDotProducts8 (const int8_t __restrict a, const int8_t __restrict b, unsigned int abSize)
	Dot product of two 8-bit vectors.
FASTCV_API uint32_t	fcvDotProductu8 (const uint8_t __restrict a, const uint8_t __restrict b, unsigned int abSize)
	Dot product of two 8-bit vectors.
FASTCV_API int32_t	fcvDotProduct36x1s8 (const int8_t __restrict a, const int8_t __restrict b)
	Dot product of two 36-byte vectors.
FASTCV_API void	fcvDotProduct36x4s8 (const int8_t __restrict a, const int8_t __restrict b, const int8_t __restrict c, const int8_t __restrict d, const int8_t __restrict e, int32_t __restrict dotProducts)
	Dot product of one 36-byte vector against 4 others.
FASTCV_API void	fcvDotProductNorm36x4s8 (const int8_t __restrict a, float invLengthA, const int8_t __restrict b0, const int8_t __restrict b1, const int8_t __restrict b2, const int8_t __restrict b3, float __restrict invLengthsB, float *__restrict dotProducts)
	Normalized dot product of one 36-byte vector against 4 others.
FASTCV_API uint32_t	fcvDotProduct36x1u8 (const uint8_t __restrict a, const uint8_t __restrict b)
	Dot product of two 36-byte vectors.
FASTCV_API void	fcvDotProduct36x4u8 (const uint8_t __restrict a, const uint8_t __restrict b, const uint8_t __restrict c, const uint8_t __restrict d, const uint8_t __restrict e, uint32_t __restrict dotProducts)
	Dot product of one 36-byte vector against 4 others.
FASTCV_API void	fcvDotProductNorm36x4u8 (const uint8_t __restrict a, float invLengthA, const uint8_t __restrict b0, const uint8_t __restrict b1, const uint8_t __restrict b2, const uint8_t __restrict b3, float __restrict invLengthsB, float *__restrict dotProducts)
	Normalized dot product of one 36-byte vector against 4 others.
FASTCV_API int32_t	fcvDotProduct64x1s8 (const int8_t __restrict a, const int8_t __restrict b)
	Dot product of two 64-byte vectors.
FASTCV_API void	fcvDotProduct64x4s8 (const int8_t __restrict a, const int8_t __restrict b, const int8_t __restrict c, const int8_t __restrict d, const int8_t __restrict e, int32_t __restrict dotProducts)
	Dot product of one 64-byte vector against 4 others.
FASTCV_API void	fcvDotProductNorm64x4s8 (const int8_t __restrict a, float invLengthA, const int8_t __restrict b0, const int8_t __restrict b1, const int8_t __restrict b2, const int8_t __restrict b3, float __restrict invLengthsB, float *__restrict dotProducts)
	Normalized dot product of one 64-byte vector against 4 others.
FASTCV_API uint32_t	fcvDotProduct64x1u8 (const uint8_t __restrict a, const uint8_t __restrict b)
	Dot product of two 64-byte vectors.
FASTCV_API void	fcvDotProduct64x4u8 (const uint8_t __restrict a, const uint8_t __restrict b, const uint8_t __restrict c, const uint8_t __restrict d, const uint8_t __restrict e, uint32_t __restrict dotProducts)
	Dot product of one 64-byte vector against 4 others.
FASTCV_API void	fcvDotProductNorm64x4u8 (const uint8_t __restrict a, float invLengthA, const uint8_t __restrict b0, const uint8_t __restrict b1, const uint8_t __restrict b2, const uint8_t __restrict b3, float __restrict invLengthsB, float *__restrict dotProducts)
	Normalized dot product of one 64-byte vector against 4 others.
FASTCV_API int32_t	fcvDotProduct128x1s8 (const int8_t __restrict a, const int8_t __restrict b)
	Dot product of two 128-byte vectors.
FASTCV_API void	fcvDotProduct128x4s8 (const int8_t __restrict a, const int8_t __restrict b, const int8_t __restrict c, const int8_t __restrict d, const int8_t __restrict e, int32_t __restrict dotProducts)
	Dot product of one 128-byte vector against 4 others.
FASTCV_API void	fcvDotProductNorm128x4s8 (const int8_t __restrict a, float invLengthA, const int8_t __restrict b0, const int8_t __restrict b1, const int8_t __restrict b2, const int8_t __restrict b3, float __restrict invLengthsB, float *__restrict dotProducts)
	Normalized dot product of one 128-byte vector against 4 others.
FASTCV_API uint32_t	fcvDotProduct128x1u8 (const uint8_t __restrict a, const uint8_t __restrict b)
	Dot product of two 128-byte vectors.
FASTCV_API void	fcvDotProduct128x4u8 (const uint8_t __restrict a, const uint8_t __restrict b, const uint8_t __restrict c, const uint8_t __restrict d, const uint8_t __restrict e, uint32_t __restrict dotProducts)
	Dot product of one 128-byte vector against 4 others.
FASTCV_API void	fcvDotProductNorm128x4u8 (const uint8_t __restrict a, float invLengthA, const uint8_t __restrict b0, const uint8_t __restrict b1, const uint8_t __restrict b2, const uint8_t __restrict b3, float __restrict invLengthsB, float *__restrict dotProducts)
	Normalized dot product of one 128-byte vector against 4 others.
FASTCV_API void	fcvDotProduct8x8u8 (const uint8_t __restrict patchPixels, const uint8_t __restrict imagePixels, unsigned short imgW, unsigned short imgH, int nX, int nY, unsigned int nNum, int32_t *__restrict dotProducts)
	Dot product of 1 patch (8x8 byte square) with several (n) 8x8 squares along a line of pixels in an image.
FASTCV_API void	fcvDotProduct11x12u8 (const uint8_t __restrict patchPixels, const uint8_t __restrict imagePixels, unsigned short imgW, unsigned short imgH, int iX, int iY, int32_t *__restrict dotProducts)
	Dot product of 1 patch (8x8 byte square) with 8x8 squares in 11x12 rectangle around the center search pixel (iX,iY).
FASTCV_API int	fcvVecNormalize36s8f32 (const int8_t __restrict src, unsigned int srcStride, const float __restrict invLen, unsigned int numVecs, float reqNorm, float __restrict dst, int32_t stopBuild)
	Translate to float and normalize 36 8-bit elements.
FASTCV_API void	fcvSumOfSquaredDiffs36x4s8 (const int8_t __restrict a, float invLenA, const int8_t __restrict b0, const int8_t __restrict b1, const int8_t __restrict b2, const int8_t __restrict b3, const float __restrict invLenB, float *__restrict distances)
	Sum of squared differences of one 36-byte vector against 4 others.
FASTCV_API void	fcvSumOfSquaredDiffs36xNs8 (const int8_t __restrict a, float invLenA, const int8_t const __restrict b, const float __restrict invLenB, unsigned int numB, float *__restrict distances)
	Sum of squared differences of one 36-byte vector against N others.
FASTCV_API void	fcvSort8Scoresf32 (float __restrict inScores, float __restrict outScores)
	Sorting of 8 float numbers.
FASTCV_API uint32_t	fcvBitCountu8 (const uint8_t *__restrict src, unsigned int srcLength)
	Counts "1" bits in supplied vector.
FASTCV_API uint32_t	fcvBitCount32x1u8 (const uint8_t *__restrict src)
	Counts "1" bits in supplied 32-byte vector.
FASTCV_API void	fcvBitCount32x4u8 (const uint8_t __restrict a, const uint8_t __restrict b, const uint8_t __restrict c, const uint8_t __restrict d, uint32_t *__restrict bitCount)
	Counts bits in supplied 4, 32-byte vectors.
FASTCV_API uint32_t	fcvBitCount64x1u8 (const uint8_t *__restrict src)
	Counts bits in supplied 64-byte vector.
FASTCV_API void	fcvBitCount64x4u8 (const uint8_t __restrict a, const uint8_t __restrict b, const uint8_t __restrict c, const uint8_t __restrict d, uint32_t *__restrict bitCount)
	Counts bits in supplied 4, 64-byte vectors.
FASTCV_API uint32_t	fcvBitCountu32 (const uint32_t *__restrict src, unsigned int srcLength)
	Counts bits in supplied vector of unsigned intergers.
FASTCV_API uint32_t	fcvHammingDistanceu8 (const uint8_t __restrict a, const uint8_t __restrict b, unsigned int abLength)
	Computes the Hamming distance between the two supplied arbitrary length vectors.
FASTCV_API uint32_t	fcvHammingDistance32x1u8a4 (const uint8_t __restrict a, const uint8_t __restrict b)
	Computes the Hamming distance between the two supplied 32-byte vectors.
FASTCV_API uint32_t	fcvHammingDistance64x1u8a4 (const uint8_t __restrict a, const uint8_t __restrict b)
	Computes the Hamming distance between the two supplied 64-byte vectors.
FASTCV_API uint32_t	fcvHammingDistance32x1u8 (const uint8_t __restrict a, const uint8_t __restrict b)
	Computes the Hamming distance between the two supplied 32-byte vectors.
FASTCV_API uint32_t	fcvHammingDistance64x1u8 (const uint8_t __restrict a, const uint8_t __restrict b)
	Computes the Hamming distance between the two supplied 64-byte vectors.
FASTCV_API void	fcvHammingDistance32x4u8a4 (const uint8_t __restrict a, const uint8_t __restrict b, const uint8_t __restrict c, const uint8_t __restrict d, const uint8_t __restrict e, uint32_t __restrict hammingDistances)
	Computes the Hamming distance between A and each of B,C,D,E 32-byte vectors.
FASTCV_API void	fcvHammingDistance64x4u8a4 (const uint8_t __restrict a, const uint8_t __restrict b, const uint8_t __restrict c, const uint8_t __restrict d, const uint8_t __restrict e, uint32_t __restrict hammingDistances)
	Computes the Hamming distance between A and each of B,C,D,E 64-byte vectors.
FASTCV_API void	fcvHammingDistance64x4u8 (const uint8_t __restrict a, const uint8_t __restrict b, const uint8_t __restrict c, const uint8_t __restrict d, const uint8_t __restrict e, uint32_t __restrict hammingDistances)
	Computes the Hamming distance between A and each of B,C,D,E 64-byte vectors.
FASTCV_API void	fcvSolvef32 (const float32_t __restrict A, int32_t numCols, int32_t numRows, const float32_t __restrict b, float32_t *__restrict x)
	Solve linear equation system Ax = b.
FASTCV_API void	fcvSetElementsu8 (uint8_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, uint8_t value, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of a uint8_t single channel array to a given value.
FASTCV_API void	fcvSetElementss32 (int32_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, int32_t value, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of an int32_t single channel array to a given value.
FASTCV_API void	fcvSetElementsf32 (float32_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, float32_t value, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of a float32_t single channel array to a given value.
FASTCV_API void	fcvSetElementsc4u8 (uint8_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, uint8_t value1, uint8_t value2, uint8_t value3, uint8_t value4, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of a uint8_t 4-channel array to a given 4-element scalar.
FASTCV_API void	fcvSetElementsc4s32 (int32_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, int32_t value1, int32_t value2, int32_t value3, int32_t value4, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of an int32_t 4-channel array to a given 4-element scalar.
FASTCV_API void	fcvSetElementsc4f32 (float32_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, float32_t value1, float32_t value2, float32_t value3, float32_t value4, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of a float32_t 4-channel array to a given 4-element scalar.
FASTCV_API void	fcvSetElementsc3u8 (uint8_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, uint8_t value1, uint8_t value2, uint8_t value3, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of a uint8_t 3-channel array to a given 3-element scalar.
FASTCV_API void	fcvSetElementsc3s32 (int32_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, int32_t value1, int32_t value2, int32_t value3, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of an int32_t 3-channel array to a given 3-element scalar.
FASTCV_API void	fcvSetElementsc3f32 (float32_t __restrict dst, uint32_t dstWidth, uint32_t dstHeight, uint32_t dstStride, float32_t value1, float32_t value2, float32_t value3, const uint8_t __restrict mask, uint32_t maskStride)
	Sets every element of a float32_t 3-channel array to a given 3-element scalar.
FASTCV_API void	fcvAbsDiffu8 (const uint8_t __restrict src1, const uint8_t __restrict src2, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t *__restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between two uint8_t matrices.
FASTCV_API void	fcvAbsDiffs32 (const int32_t __restrict src1, const int32_t __restrict src2, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int32_t *__restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between two int32_t matrices.
FASTCV_API void	fcvAbsDifff32 (const float32_t __restrict src1, const float32_t __restrict src2, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t *__restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between two float32_t matrices.
FASTCV_API void	fcvAbsDiffVu8 (const uint8_t __restrict src, uint8_t value, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one matrix and one value.
FASTCV_API void	fcvAbsDiffVs32 (const int32_t __restrict src, int32_t value, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int32_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one matrix and one value.
FASTCV_API void	fcvAbsDiffVf32 (const float32_t __restrict src, float32_t value, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one matrix and one value.
FASTCV_API void	fcvAbsDiffVc4u8 (const uint8_t __restrict src, uint8_t value1, uint8_t value2, uint8_t value3, uint8_t value4, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one 4-channel matrix and a 4-element Scalar.
FASTCV_API void	fcvAbsDiffVc4s32 (const int32_t __restrict src, int32_t value1, int32_t value2, int32_t value3, int32_t value4, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int32_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one 4-channel matrix and a 4-element Scalar.
FASTCV_API void	fcvAbsDiffVc4f32 (const float32_t __restrict src, float32_t value1, float32_t value2, float32_t value3, float32_t value4, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one 4-channel matrix and a 4-element Scalar.
FASTCV_API void	fcvAbsDiffVc3u8 (const uint8_t __restrict src, uint8_t value1, uint8_t value2, uint8_t value3, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one 3-channel matrix and a 3-element Scalar.
FASTCV_API void	fcvAbsDiffVc3s32 (const int32_t __restrict src, int32_t value1, int32_t value2, int32_t value3, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int32_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one 3-channel matrix and a 3-element Scalar.
FASTCV_API void	fcvAbsDiffVc3f32 (const float32_t __restrict src, float32_t value1, float32_t value2, float32_t value3, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t __restrict dst, uint32_t dstStride)
	Computes the per-element absolute difference between one 3-channel matrix and a 3-element Scalar.
FASTCV_API void	fcvSVDf32 (const float32_t __restrict A, uint32_t m, uint32_t n, float32_t __restrict w, float32_t __restrict U, float32_t __restrict Vt, float32_t tmpU, float32_t tmpV)
	Compute a singular value decomposition of a matrix of a float type A = Udiag[w]Vt; It is used for solving problems like least-squares, under-determined linear systems, matrix inversion and so forth. The algorithm used here does not compute the full U and V matrices however it computes a condensed version of U and V described below which is sufficient to solve most problems which use SVD.
FASTCV_API int32_t	fcvSolveCholeskyf32 (float32_t __restrict A, const float32_t __restrict b, float32_t __restrict diag, uint32_t N, float32_t __restrict x)
	Executes Cholesky decomposition algorithm on a symmetric and positive definite matrix to solve the linear system A*x = b, where A is an NxN matrix and x & b are vectors of size N.
FASTCV_API void	fcvJacobianSE2f32 (const uint8_t __restrict warpedImage, const uint16_t __restrict warpedBorder, const uint8_t __restrict targetImage, const int16_t __restrict targetDX, const int16_t __restrict targetDY, uint32_t width, uint32_t height, uint32_t stride, float32_t __restrict sumJTJ, float32_t __restrict sumJTE, float32_t __restrict sumError, uint32_t *__restrict numPixels)
	Calculates JTJ, JTE and the sum absolute, normalized pixel differences for a target image and a reference image of same size for an SE2 image motion model. Since gradients are required for this algorithm all border pixels in referenceImage and targetImage are ignored. NOTE: Only works for images with even width and height.
FASTCV_API int32_t	fcvSolveLDLf32 (float32_t __restrict A, const float32_t __restrict b, float32_t __restrict diag, uint32_t N, float32_t __restrict x)
	Executes LDL decomposition algorithm on a symmetric and positive definite matrix to solve the linear system A*x = b, where A is an NxN matrix and x & b are vectors of size N.
FASTCV_API float32_t	fcvDotProductf32 (const float32_t __restrict a, const float32_t __restrict b, uint32_t N)
	Executes dot product of two floating point vectors.
FASTCV_API void	fcvSumOfSquaredDiffsu8 (const uint8_t __restrict a, float32_t invLenA, uint32_t dim, const uint8_t const __restrict bList, const float32_t __restrict invLenB, uint32_t numB, float32_t *__restrict distances)
	Sum of squared differences of one L-byte vector against N others.
FASTCV_API void	fcvSumOfSquaredDiffsf32 (const float32_t __restrict a, float32_t invLenA, uint32_t dim, const float32_t const __restrict bList, const float32_t __restrict invLenB, uint32_t numB, float32_t *__restrict distances)
	Sum of squared differences of one floating vector of L-elements against N others.
FASTCV_API void	fcvTransposeu8 (const uint8_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t __restrict dst, uint32_t dstStride)
	Matrix transpose of one uint8_t type matrix.
FASTCV_API void	fcvTransposeu16 (const uint16_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint16_t __restrict dst, uint32_t dstStride)
	Matrix transpose of one uint16_t type matrix.
FASTCV_API void	fcvTransposef32 (const float32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t __restrict dst, uint32_t dstStride)
	Matrix transpose of one float32_t type matrix.
FASTCV_API void	fcvFlipu8 (const uint8_t src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t dst, uint32_t dstStride, fcvFlipDir dir)
	Flip one uint8_t type matrix. If src and dst point to the same address and srcStride equals to dstStride, it will do in-place flip.
FASTCV_API void	fcvFlipu16 (const uint16_t src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint16_t dst, uint32_t dstStride, fcvFlipDir dir)
	Flip one uint16_t type matrix. If src and dst point to the same address, and srcStride equals to dstStride, it will do in-place flip.
FASTCV_API fcvStatus	fcvFlipRGB888u8 (const uint8_t src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t dst, uint32_t dstStride, fcvFlipDir dir)
	Flips an interleaved RGB image.
FASTCV_API fcvStatus	fcvRotateImageu8 (const uint8_t src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t dst, uint32_t dstStride, fcvRotateDegree degree)
	Rotate one uint8_t type image.
FASTCV_API fcvStatus	fcvRotateImageInterleavedu8 (const uint8_t src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t dst, uint32_t dstStride, fcvRotateDegree degree)
	Rotate one interleaved uint8_t type image (e.g. UV channel in NV21).
FASTCV_API void	fcvElementMultiplyu8u16 (const uint8_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t src2, uint32_t src2Stride, uint16_t *__restrict dst, uint32_t dstStride)
	Element-wise multiplication of two uint8_t type matrices.
FASTCV_API void	fcvElementMultiplyf32 (const float32_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const float32_t src2, uint32_t src2Stride, float32_t *__restrict dst, uint32_t dstStride)
	Element-wise multiplication of two float32_t type matrices.
FASTCV_API void	fcvMatrixMultiplys8s32 (const int8_t __restrict src1, uint32_t src1Width, uint32_t src1Height, uint32_t src1Stride, const int8_t __restrict src2, uint32_t src2Width, uint32_t src2Stride, int32_t *__restrict dst, uint32_t dstStride)
	Matrix multiplication of two int8_t type matrices.
FASTCV_API void	fcvMatrixMultiplyf32 (const float32_t __restrict src1, uint32_t src1Width, uint32_t src1Height, uint32_t src1Stride, const float32_t __restrict src2, uint32_t src2Width, uint32_t src2Stride, float32_t *__restrict dst, uint32_t dstStride)
	Matrix multiplication of two float32_t type matrices.
FASTCV_API uint32_t	fcvBlockDotProductu8 (const uint8_t __restrict src1, uint32_t blockWidth, uint32_t blockHeight, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride)
	Dot product of two uint8_t type blocks.
FASTCV_API float32_t	fcvBlockDotProductf32 (const float32_t __restrict src1, uint32_t blockWidth, uint32_t blockHeight, uint32_t src1Stride, const float32_t __restrict src2, uint32_t src2Stride)
	Dot product of two float32_t type blocks.
FASTCV_API void	fcvAddu8u16 (const uint8_t __restrict src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride, uint16_t *__restrict dst, uint32_t dstStride)
	Matrix addition of two uint8_t type matrices.
FASTCV_API void	fcvAdds16 (const int16_t __restrict src1, uint32_t width, uint32_t height, uint32_t src1Stride, const int16_t __restrict src2, uint32_t src2Stride, int16_t *__restrict dst, uint32_t dstStride)
	Matrix addition of two int16_t type matrices with saturation.
FASTCV_API void	fcvAddf32 (const float32_t __restrict src1, uint32_t width, uint32_t height, uint32_t src1Stride, const float32_t __restrict src2, uint32_t src2Stride, float32_t *__restrict dst, uint32_t dstStride)
	Matrix addition of two float32_t type matrices.
FASTCV_API void	fcvSumOfSquaredDiffss8 (const int8_t __restrict a, float32_t invLenA, uint32_t dim, const int8_t const __restrict bList, const float32_t __restrict invLenB, uint32_t numB, float32_t *__restrict distances)
	Sum of squared differences of one L-byte vector against N others.
FASTCV_API fcvStatus	fcvAddScalarf32 (const float32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t scalar, float32_t __restrict dst, uint32_t dstStride)
	Adds a scalar value to every element of a Matrix.
FASTCV_API fcvStatus	fcvAddScalars16 (const int16_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int16_t scalar, int16_t __restrict dst, uint32_t dstStride)
	Adds a scalar value to every element of a Matrix.
FASTCV_API fcvStatus	fcvMultiplyScalarf32 (const float32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t scalar, float32_t __restrict dst, uint32_t dstStride)
	Multiplies a scalar value to every element of a Matrix.
FASTCV_API fcvStatus	fcvMultiplyScalars16 (const int16_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int8_t scalar, int8_t shift, int16_t __restrict dst, uint32_t dstStride)
	Multiplies a scalar value to every element of a Matrix.
FASTCV_API fcvStatus	fcvMinMaxLocu8 (const uint8_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t __restrict minVal, uint8_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t *__restrict maxLocY)
	Finds the minimum and maximum values, and their location in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocu16 (const uint16_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint16_t __restrict minVal, uint16_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t *__restrict maxLocY)
	Finds the minimum and maximum values, and their location in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocs16 (const int16_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int16_t __restrict minVal, int16_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t *__restrict maxLocY)
	Finds the minimum and maximum values, and their location in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocu32 (const uint32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint32_t __restrict minVal, uint32_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t *__restrict maxLocY)
	Finds the minimum and maximum values, and their location in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocs32 (const int32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int32_t __restrict minVal, int32_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t *__restrict maxLocY)
	Finds the minimum and maximum values, and their location in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocf32 (const float32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t __restrict minVal, float32_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t *__restrict maxLocY)
	Finds the minimum and maximum values, and their location in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocf32_v2 (const float32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, float32_t __restrict minVal, float32_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t __restrict maxLocY, uint32_t __restrict minCount, uint32_t *__restrict maxCount, uint32_t nMinLocSize, uint32_t nMaxLocSize)
	Finds the minimum and maximum values, and their locations in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocu8_v2 (const uint8_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t __restrict minVal, uint8_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t __restrict maxLocY, uint32_t __restrict minCount, uint32_t *__restrict maxCount, uint32_t nMinLocSize, uint32_t nMaxLocSize)
	Finds the minimum and maximum values, and their locations in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocu16_v2 (const uint16_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint16_t __restrict minVal, uint16_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t __restrict maxLocY, uint32_t __restrict minCount, uint32_t *__restrict maxCount, uint32_t nMinLocSize, uint32_t nMaxLocSize)
	Finds the minimum and maximum values, and their locations in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocs16_v2 (const int16_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int16_t __restrict minVal, int16_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t __restrict maxLocY, uint32_t __restrict minCount, uint32_t *__restrict maxCount, uint32_t nMinLocSize, uint32_t nMaxLocSize)
	Finds the minimum and maximum values, and their locations in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocu32_v2 (const uint32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint32_t __restrict minVal, uint32_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t __restrict maxLocY, uint32_t __restrict minCount, uint32_t *__restrict maxCount, uint32_t nMinLocSize, uint32_t nMaxLocSize)
	Finds the minimum and maximum values, and their locations in a matrix.
FASTCV_API fcvStatus	fcvMinMaxLocs32_v2 (const int32_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, int32_t __restrict minVal, int32_t __restrict maxVal, uint32_t __restrict minLocX, uint32_t __restrict minLocY, uint32_t __restrict maxLocX, uint32_t __restrict maxLocY, uint32_t __restrict minCount, uint32_t *__restrict maxCount, uint32_t nMinLocSize, uint32_t nMaxLocSize)
	Finds the minimum and maximum values, and their locations in a matrix.
FASTCV_API fcvStatus	fcvTransposeRGB888u8 (const uint8_t __restrict src, uint32_t srcWidth, uint32_t srcHeight, uint32_t srcStride, uint8_t __restrict dst, uint32_t dstStride)
	Transposes an interleaved RGB image.
FASTCV_API fcvStatus	fcvCrossProduct3x1f32 (const float32_t __restrict a, const float32_t __restrict b, float32_t *__restrict c, uint32_t N)
	Computes the cross-product of N pairs of 3x1 vectors.
FASTCV_API fcvStatus	fcvSolveLUf32 (float32_t __restrict A, float32_t __restrict b, uint32_t N, uint8_t __restrict pivot, float32_t __restrict x)
	Solves a Linear System of Equations using LU-Decomposition.
FASTCV_API fcvStatus	fcvBitwiseAndu8 (const uint8_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride, uint8_t *dst, uint32_t dstStride)
	Bitwise AND operation for each element of two uint8_t matrices.
FASTCV_API fcvStatus	fcvBitwiseXoru8 (const uint8_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride, uint8_t *dst, uint32_t dstStride)
	Bitwise Exclusive Or operation for each element of two uint8_t matrices.
FASTCV_API fcvStatus	fcvBitwiseNotu8 (const uint8_t src, uint32_t width, uint32_t height, uint32_t srcStride, uint8_t dst, uint32_t dstStride)
	Bitwise Not operation for each element of two uint8_t matrices.
FASTCV_API fcvStatus	fcvAddu8 (const uint8_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride, fcvConvertPolicy policy, uint8_t *dst, uint32_t dstStride)
	Matrix addition of two uint8_t type matrices to one uint8_t matrix.
FASTCV_API fcvStatus	fcvAdds16_v2 (const int16_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const int16_t __restrict src2, uint32_t src2Stride, fcvConvertPolicy policy, int16_t *dst, uint32_t dstStride)
	Matrix addition of two int16_t type matrices which allows in-place operation.
FASTCV_API fcvStatus	fcvAddu16u8u16 (const uint16_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride, fcvConvertPolicy policy, uint16_t *dst, uint32_t dstStride)
	Matrix addition of one uint16_t type matrix and one uint8_t matrix.
FASTCV_API fcvStatus	fcvSubtractu8 (const uint8_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride, fcvConvertPolicy policy, uint8_t *dst, uint32_t dstStride)
	Matrix substration of two uint8_t type matrices.
FASTCV_API fcvStatus	fcvSubtracts16 (const int16_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const int16_t __restrict src2, uint32_t src2Stride, fcvConvertPolicy policy, int16_t *dst, uint32_t dstStride)
	Matrix substration of two uint16_t type matrices.
FASTCV_API fcvStatus	fcvSubtractu8s16 (const uint8_t __restrict src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride, int16_t *__restrict dst, uint32_t dstStride)
	Matrix substration of two uint8_t type matrices.
FASTCV_API fcvStatus	fcvElementMultiplys16 (const int16_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const int16_t src2, uint32_t src2Stride, int8_t scaleFactor, fcvConvertPolicy policy, int16_t *__restrict dst, uint32_t dstStride)
	Element-wise multiplication of two int16_t type matrices.
FASTCV_API fcvStatus	fcvElementMultiplyu8s16 (const uint8_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t src2, uint32_t src2Stride, int8_t scaleFactor, fcvConvertPolicy policy, int16_t *__restrict dst, uint32_t dstStride)
	Element-wise multiplication of two uint8_t type matrices.
FASTCV_API fcvStatus	fcvElementMultiplyu8 (const uint8_t src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint8_t src2, uint32_t src2Stride, int8_t scaleFactor, fcvConvertPolicy policy, uint8_t *__restrict dst, uint32_t dstStride)
	Element-wise multiplication of two uint8_t type matrices.
FASTCV_API fcvStatus	fcvAddWeightedu8 (const uint8_t src1, uint32_t srcWidth, uint32_t srcHeight, uint32_t src1Stride, const uint8_t __restrict src2, uint32_t src2Stride, float32_t alpha, float32_t beta, uint8_t *dst, uint32_t dstStride)
	Calculate the weighted sum of two uint8_t type matrices.
FASTCV_API fcvStatus	fcvAddSquaredu8u16 (const uint8_t __restrict src1, uint32_t width, uint32_t height, uint32_t src1Stride, const uint16_t src2, uint32_t src2Stride, int8_t scaleFactor, uint16_t *dst, uint32_t dstStride)
	Add a squared value of one type uint8 matrix to the other type uint16 type matrix.

Detailed Description

Commonly used vector & math functions

Function Documentation

FASTCV_API void fcvAbsDifff32	(	const float32_t *__restrict	src1,
		const float32_t *__restrict	src2,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between two float32_t matrices.

Parameters:

src1	First input matrix
src2	Second input matrix which has the same width and length as src1
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width and length as src1
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffs32	(	const int32_t *__restrict	src1,
		const int32_t *__restrict	src2,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int32_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between two int32_t matrices.

Parameters:

src1	The first input matrix
src2	Second input matrix which has the same width and length as src1
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width and length as src1
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffu8	(	const uint8_t *__restrict	src1,
		const uint8_t *__restrict	src2,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between two uint8_t matrices.

Parameters:

src1	The first input matrix
src2	Second input matrix which has the same width and length as src1
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width and length as src1
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVc3f32	(	const float32_t *__restrict	src,
		float32_t	value1,
		float32_t	value2,
		float32_t	value3,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one 3-channel matrix and a 3-element Scalar.

Parameters:

src	Input matrix
value1	First value of the Scalar
value2	Second value of the Scalar
value3	Third value of the Scalar
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width, length and channel number as src
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVc3s32	(	const int32_t *__restrict	src,
		int32_t	value1,
		int32_t	value2,
		int32_t	value3,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int32_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one 3-channel matrix and a 3-element Scalar.

Parameters:

src	Input matrix
value1	First value of the Scalar
value2	Second value of the Scalar
value3	Third value of the Scalar
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width, length and channel number as src
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVc3u8	(	const uint8_t *__restrict	src,
		uint8_t	value1,
		uint8_t	value2,
		uint8_t	value3,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one 3-channel matrix and a 3-element Scalar.

Parameters:

src	Input matrix
value1	First value of the Scalar
value2	Second value of the Scalar
value3	Third value of the Scalar
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width, length and channel number as src
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVc4f32	(	const float32_t *__restrict	src,
		float32_t	value1,
		float32_t	value2,
		float32_t	value3,
		float32_t	value4,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one 4-channel matrix and a 4-element Scalar.

Parameters:

src	Input matrix
value1	First value of the Scalar
value2	Second value of the Scalar
value3	Third value of the Scalar
value4	Fourth value of the Scalar
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width, length and channel number as src
dstStride	Stride for output image , i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVc4s32	(	const int32_t *__restrict	src,
		int32_t	value1,
		int32_t	value2,
		int32_t	value3,
		int32_t	value4,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int32_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one 4-channel matrix and a 4-element Scalar.

Parameters:

src	Input matrix
value1	First value of the Scalar
value2	Second value of the Scalar
value3	Third value of the Scalar
value4	Fourth value of the Scalar
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width, length and channel number as src
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVc4u8	(	const uint8_t *__restrict	src,
		uint8_t	value1,
		uint8_t	value2,
		uint8_t	value3,
		uint8_t	value4,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one 4-channel matrix and a 4-element Scalar.

Parameters:

src	Input matrix
value1	First value of the Scalar
value2	Second value of the Scalar
value3	Third value of the Scalar
value4	Fourth value of the Scalar
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width, length and channel number as src
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVf32	(	const float32_t *__restrict	src,
		float32_t	value,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one matrix and one value.

Parameters:

src	Input matrix
value	Input value
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width and length as src
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVs32	(	const int32_t *__restrict	src,
		int32_t	value,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int32_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one matrix and one value.

Parameters:

src	Input matrix
value	Input value
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width and length as src
dstStride	Stride for output image , i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAbsDiffVu8	(	const uint8_t *__restrict	src,
		uint8_t	value,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *__restrict	dst,
		uint32_t	dstStride
	)

Computes the per-element absolute difference between one matrix and one value.

Parameters:

src	Input matrix
value	Input value
srcWidth	Input matrix width
srcHeight	Input matrix height
srcStride	Stride for the input matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
dst	Output matrix which has the same width and length as src
dstStride	Stride for output image, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvAddf32	(	const float32_t *__restrict	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const float32_t *__restrict	src2,
		uint32_t	src2Stride,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix addition of two float32_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width4. WARNING:* should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width4. WARNING:* should be multiple of 8
dst	the result matrix (float32_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width4. WARNING:* should be multiple of 8

FASTCV_API void fcvAdds16	(	const int16_t *__restrict	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const int16_t *__restrict	src2,
		uint32_t	src2Stride,
		int16_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix addition of two int16_t type matrices with saturation.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width2. WARNING:* should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width2. WARNING:* should be multiple of 8
dst	the result matrix (int16_t type). The result will be saturated to int16_t. NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

FASTCV_API fcvStatus fcvAdds16_v2	(	const int16_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const int16_t *__restrict	src2,
		uint32_t	src2Stride,
		fcvConvertPolicy	policy,
		int16_t *	dst,
		uint32_t	dstStride
	)

Matrix addition of two int16_t type matrices which allows in-place operation.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
policy	Conversion policy that decides how data overflow should be handled
dst	the result matrix (int16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvAddScalarf32	(	const float32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t	scalar,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Adds a scalar value to every element of a Matrix.

Adds a floating point scalar value to each element of the source Matrix and stores the result of the addition in the corresponding element of the destination matrix.

Parameters:

src	Input floating point matrix. Size of buffer is srcStride*srcHeight bytes.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
scalar	The floating point scalar to be added to the source matrix.
dst	Output 32-bit floating point matrix. Size of buffer is dstStride*srcHeight bytes.
dstStride	Stride of the output matrix in bytes. NOTE: if 0, dstStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvAddScalars16	(	const int16_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int16_t	scalar,
		int16_t *__restrict	dst,
		uint32_t	dstStride
	)

Adds a scalar value to every element of a Matrix.

Adds a signed 16-bit integer scalar value to each element of the source Matrix and stores the result of the addition in the corresponding element of the destination matrix.
NOTE : If the sum of the scalar and matrix element exceeds the maximum value of a signed 16-bit integer, the result is clipped to this maximum value, while if the sum goes below the minimum value of a signed 16-bit integer, it is clipped to this minimum value.

Parameters:

src	Input signed 16-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
scalar	The signed 16-bit integer scalar to be added to the source matrix.
dst	Output 16-bit signed integer matrix. Size of buffer is dstStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
dstStride	Stride of the output matrix in bytes. NOTE: if 0, dstStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvAddSquaredu8u16	(	const uint8_t *__restrict	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint16_t *	src2,
		uint32_t	src2Stride,
		int8_t	scaleFactor,
		uint16_t *	dst,
		uint32_t	dstStride
	)

Add a squared value of one type uint8 matrix to the other type uint16 type matrix.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
scaleFactor	The number of bits to be shifted to scale the result. When scaleFactor > 0, results are right-shifted; when scaleFactor < 0, results are left-shifted by absolute value of scaleFactor
dst	the result matrix (uint16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvAddu16u8u16	(	const uint16_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride,
		fcvConvertPolicy	policy,
		uint16_t *	dst,
		uint32_t	dstStride
	)

Matrix addition of one uint16_t type matrix and one uint8_t matrix.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
policy	Conversion policy that decides how data overflow should be handled
dst	the result matrix (uint16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvAddu8	(	const uint8_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride,
		fcvConvertPolicy	policy,
		uint8_t *	dst,
		uint32_t	dstStride
	)

Matrix addition of two uint8_t type matrices to one uint8_t matrix.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
policy	Conversion policy that decides how data overflow should be handled
dst	the result matrix (uint8_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API void fcvAddu8u16	(	const uint8_t *__restrict	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride,
		uint16_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix addition of two uint8_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
dst	the result matrix (uint16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

FASTCV_API fcvStatus fcvAddWeightedu8	(	const uint8_t *	src1,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride,
		float32_t	alpha,
		float32_t	beta,
		uint8_t *	dst,
		uint32_t	dstStride
	)

Calculate the weighted sum of two uint8_t type matrices.

Specifically, when 0 <= alpha <= 1 and beta = 1-alpha, the function accumulates a weighted value from an input image to an output image
NOTE: alpha and beta should be within the range of (-128.0, 127.996) and have up to three digits decimal precision

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
alpha	The weight value applied to src1 matrix, provided by float32_t type
beta	The weight value applied to src2 matrix, provided by float32_t type
dst	the result matrix (uint8_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width. WARNING: should be multiple of 8

Returns:: FASTCV_SUCCESS upon success, other values upon failure.

FASTCV_API uint32_t fcvBitCount32x1u8 ( const uint8_t *__restrict src )

Counts "1" bits in supplied 32-byte vector.

Parameters:

src	Pointer to 32-byte vector(s) to count bits that are 1.

Returns:: total number of "1" bits in supplied vector

FASTCV_API void fcvBitCount32x4u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		const uint8_t *__restrict	c,
		const uint8_t *__restrict	d,
		uint32_t *__restrict	bitCount
	)

Counts bits in supplied 4, 32-byte vectors.

Parameters:

a	Pointer to 32-byte vector to count bits.
b	Pointer to 32-byte vector to count bits.
c	Pointer to 32-byte vector to count bits.
d	Pointer to 32-byte vector to count bits.
bitCount	Array to store the four resultant bit counts.

FASTCV_API uint32_t fcvBitCount64x1u8 ( const uint8_t *__restrict src )

Counts bits in supplied 64-byte vector.

Parameters:

src	Pointer to 64-byte vector(s) to count bits.

Returns:: Bit count.

FASTCV_API void fcvBitCount64x4u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		const uint8_t *__restrict	c,
		const uint8_t *__restrict	d,
		uint32_t *__restrict	bitCount
	)

Counts bits in supplied 4, 64-byte vectors.

Parameters:

a	Pointer to 64-byte vector to count bits.
b	Pointer to 64-byte vector to count bits.
c	Pointer to 64-byte vector to count bits.
d	Pointer to 64-byte vector to count bits.
bitCount	Array to store the four resultant bit counts.

FASTCV_API uint32_t fcvBitCountu32	(	const uint32_t *__restrict	src,
		unsigned int	srcLength
	)

Counts bits in supplied vector of unsigned intergers.

Parameters:

src	Pointer to vector(s) to count bits.
srcLength	Number of elements in vector

Returns:: Bit count.

FASTCV_API uint32_t fcvBitCountu8	(	const uint8_t *__restrict	src,
		unsigned int	srcLength
	)

Counts "1" bits in supplied vector.

Parameters:

src	Pointer to vector to count bits that are 1.
srcLength	Length of the vector to count bits. Assumed that the remainder of bits modulo 8 will be set to 0 a priori.

Returns:: total number of "1" bits in supplied vector

FASTCV_API fcvStatus fcvBitwiseAndu8	(	const uint8_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride,
		uint8_t *	dst,
		uint32_t	dstStride
	)

Bitwise AND operation for each element of two uint8_t matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
dst	the result matrix (uint8_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width. WARNING: should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvBitwiseNotu8	(	const uint8_t *	src,
		uint32_t	width,
		uint32_t	height,
		uint32_t	srcStride,
		uint8_t *	dst,
		uint32_t	dstStride
	)

Bitwise Not operation for each element of two uint8_t matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
dst	the result matrix (uint8_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width. WARNING: should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvBitwiseXoru8	(	const uint8_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride,
		uint8_t *	dst,
		uint32_t	dstStride
	)

Bitwise Exclusive Or operation for each element of two uint8_t matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
dst	the result matrix (uint8_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width. WARNING: should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API float32_t fcvBlockDotProductf32	(	const float32_t *__restrict	src1,
		uint32_t	blockWidth,
		uint32_t	blockHeight,
		uint32_t	src1Stride,
		const float32_t *__restrict	src2,
		uint32_t	src2Stride
	)

Dot product of two float32_t type blocks.

Parameters:

src1	First source block.
blockWidth	Width of the source block.
blockHeight	Height of the source block.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to src1Width4. WARNING:* should be multiple of 8
src2	Second source block.
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to src2Width4. WARNING:* should be multiple of 8

Returns:: Block dot product (float32_t).

FASTCV_API uint32_t fcvBlockDotProductu8	(	const uint8_t *__restrict	src1,
		uint32_t	blockWidth,
		uint32_t	blockHeight,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride
	)

Dot product of two uint8_t type blocks.

Parameters:

src1	First source block.
blockWidth	Width of the source block.
blockHeight	Height of the source block. NOTE: blockWidth*blockHeight should not be larger than 65536
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to src1Width. WARNING: should be multiple of 8
src2	Second source block.
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to src2Width. WARNING: should be multiple of 8

Returns:: Block dot product (uint32_t).

FASTCV_API fcvStatus fcvCrossProduct3x1f32	(	const float32_t *__restrict	a,
		const float32_t *__restrict	b,
		float32_t *__restrict	c,
		uint32_t	N
	)

Computes the cross-product of N pairs of 3x1 vectors.

Computes the cross-product of N pairs of 3x1 vectors. For each pair of 3x1 vectors, ai and bi, the output vector ci is given by ci = ai x bi

Parameters:

a	Input buffer containing "N" 3x1 Vectors. Must be of length N * 3. The layout of the array is as follows x0 y0 z0 x1 y1 z1 .......
b	Input buffer containing "N" 3x1 Vectors. Must be of length N * 3. The layout of the array is as follows x0 y0 z0 x1 y1 z1 .......
c	Output buffer containing the resultant "N" 3x1 Vectors. Each 3 x 1 Vector in c is computed from the corresponding 3 x 1 pairs in a & b. Must be of length N * 3.
N	Number of vector pairs. For example if N = 10, then a, b and c will have to be 10 x 3 in length

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API void fcvDotProduct11x12u8	(	const uint8_t *__restrict	patchPixels,
		const uint8_t *__restrict	imagePixels,
		unsigned short	imgW,
		unsigned short	imgH,
		int	iX,
		int	iY,
		int32_t *__restrict	dotProducts
	)

Dot product of 1 patch (8x8 byte square) with 8x8 squares in 11x12 rectangle around the center search pixel (iX,iY).

Parameters:

patchPixels	Pointer to 8-bit patch pixel values linearly laid out in memory.
imagePixels	Pointer to 8-bit image pixel values linearly laid out in memory.
imgW	Width in pixels of the image.
imgH	Height in pixels of the image.
iX	X location on image of the center of the search window.
iY	Y location on image of the center of the search window.
dotProducts	Output 11x12 dot product values. WARNING: array should be 128-bit aligned

FASTCV_API int32_t fcvDotProduct128x1s8	(	const int8_t *__restrict	a,
		const int8_t *__restrict	b
	)

Dot product of two 128-byte vectors.

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
b	Vector. NOTE: array should be 128-bit aligned

Returns:: Dot product <a|b>.

FASTCV_API uint32_t fcvDotProduct128x1u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b
	)

Dot product of two 128-byte vectors.

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
b	Vector. NOTE: array should be 128-bit aligned

Returns:: Dot product <a|b>.

FASTCV_API void fcvDotProduct128x4s8	(	const int8_t *__restrict	a,
		const int8_t *__restrict	b,
		const int8_t *__restrict	c,
		const int8_t *__restrict	d,
		const int8_t *__restrict	e,
		int32_t *__restrict	dotProducts
	)

Dot product of one 128-byte vector against 4 others.

Dot product of vector (a) against 4 others (b,c,d,e):
<a|b>, <a|c>, <a|d>, <a|e>

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
b	Vector. NOTE: array should be 128-bit aligned
c	Vector. NOTE: array should be 128-bit aligned
d	Vector. NOTE: array should be 128-bit aligned
e	Vector. NOTE: array should be 128-bit aligned
dotProducts	Output of the 4 results { <a\|b>, <a\|c>, <a\|d>, <a\|e> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProduct128x4u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		const uint8_t *__restrict	c,
		const uint8_t *__restrict	d,
		const uint8_t *__restrict	e,
		uint32_t *__restrict	dotProducts
	)

Dot product of one 128-byte vector against 4 others.

Dot product of vector (a) against 4 others (b,c,d,e):
<a|b>, <a|c>, <a|d>, <a|e>

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
b	Vector. NOTE: array should be 128-bit aligned
c	Vector. NOTE: array should be 128-bit aligned
d	Vector. NOTE: array should be 128-bit aligned
e	Vector. NOTE: array should be 128-bit aligned
dotProducts	Output of the 4 results { <a\|b>, <a\|c>, <a\|d>, <a\|e> }. WARNING: array should be 128-bit aligned

FASTCV_API int32_t fcvDotProduct36x1s8	(	const int8_t *__restrict	a,
		const int8_t *__restrict	b
	)

Dot product of two 36-byte vectors.

Parameters:

a	Vector.
b	Vector.

Returns:: Dot product <a|b>.

FASTCV_API uint32_t fcvDotProduct36x1u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b
	)

Dot product of two 36-byte vectors.

Parameters:

a	Vector.
b	Vector.

Returns:: Dot product <a|b>.

FASTCV_API void fcvDotProduct36x4s8	(	const int8_t *__restrict	a,
		const int8_t *__restrict	b,
		const int8_t *__restrict	c,
		const int8_t *__restrict	d,
		const int8_t *__restrict	e,
		int32_t *__restrict	dotProducts
	)

Dot product of one 36-byte vector against 4 others.

Dot product of 36-byte vector (a) against 4 others (b,c,d,e):
<a|b>, <a|c>, <a|d>, <a|e>

Parameters:

a	Vector.
b	Vector.
c	Vector.
d	Vector.
e	Vector.
dotProducts	Output of the 4 results { <a\|b>, <a\|c>, <a\|d>, <a\|e> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProduct36x4u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		const uint8_t *__restrict	c,
		const uint8_t *__restrict	d,
		const uint8_t *__restrict	e,
		uint32_t *__restrict	dotProducts
	)

Dot product of one 36-byte vector against 4 others.

Dot product of 36-byte vector (a) against 4 others (b,c,d,e):
<a|b>, <a|c>, <a|d>, <a|e>

Parameters:

a	Vector.
b	Vector.
c	Vector.
d	Vector.
e	Vector.
dotProducts	Output of the 4 results { <a\|b>, <a\|c>, <a\|d>, <a\|e> }. WARNING: array should be 128-bit aligned

FASTCV_API int32_t fcvDotProduct64x1s8	(	const int8_t *__restrict	a,
		const int8_t *__restrict	b
	)

Dot product of two 64-byte vectors.

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
b	Vector. NOTE: array should be 128-bit aligned

Returns:: Dot product <a|b>.

FASTCV_API uint32_t fcvDotProduct64x1u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b
	)

Dot product of two 64-byte vectors.

Parameters:

a	Vector.
b	Vector.

Returns:: Dot product <a|b>.

FASTCV_API void fcvDotProduct64x4s8	(	const int8_t *__restrict	a,
		const int8_t *__restrict	b,
		const int8_t *__restrict	c,
		const int8_t *__restrict	d,
		const int8_t *__restrict	e,
		int32_t *__restrict	dotProducts
	)

Dot product of one 64-byte vector against 4 others.

Dot product of vector (a) against 4 others (b,c,d,e):
<a|b>, <a|c>, <a|d>, <a|e>

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
b	Vector. NOTE: array should be 128-bit aligned
c	Vector. NOTE: array should be 128-bit aligned
d	Vector. NOTE: array should be 128-bit aligned
e	Vector. NOTE: array should be 128-bit aligned
dotProducts	Output of the 4 results { <a\|b>, <a\|c>, <a\|d>, <a\|e> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProduct64x4u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		const uint8_t *__restrict	c,
		const uint8_t *__restrict	d,
		const uint8_t *__restrict	e,
		uint32_t *__restrict	dotProducts
	)

Dot product of one 64-byte vector against 4 others.

Dot product of 36-byte vector (a) against 4 others (b,c,d,e):
<a|b>, <a|c>, <a|d>, <a|e>

Parameters:

a	Vector.
b	Vector.
c	Vector.
d	Vector.
e	Vector.
dotProducts	Output of the 4 results { <a\|b>, <a\|c>, <a\|d>, <a\|e> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProduct8x8u8	(	const uint8_t *__restrict	patchPixels,
		const uint8_t *__restrict	imagePixels,
		unsigned short	imgW,
		unsigned short	imgH,
		int	nX,
		int	nY,
		unsigned int	nNum,
		int32_t *__restrict	dotProducts
	)

Dot product of 1 patch (8x8 byte square) with several (n) 8x8 squares along a line of pixels in an image.

Parameters:

patchPixels	Pointer to 8-bit patch pixel values linearly laid out in memory.
imagePixels	Pointer to 8-bit image pixel values linearly laid out in memory.
imgW	Width in pixels of the source image.
imgH	Height in pixels of the source image.
nX	X location on image of starting search pixel.
nY	Y location on image of starting search pixel.
nNum	Number of pixels (in X direction) on image to sweep.
dotProducts	Output dot product values of nNum pixels. WARNING: array size must be a multiple of 4 (e.g., 4, 8, 12, ...) NOTE: array should be 128-bit aligned

FASTCV_API float32_t fcvDotProductf32	(	const float32_t *__restrict	a,
		const float32_t *__restrict	b,
		uint32_t	N
	)

Executes dot product of two floating point vectors.

Parameters:

a	Pointer to the vector a of size N. NOTE: should be 128-bit aligned.
b	Pointer to the vector b of size N. NOTE: should be 128-bit aligned.
N	Size of matrix and vectors.

Returns:: the dot product in float32_t.

FASTCV_API void fcvDotProductNorm128x4s8	(	const int8_t *__restrict	a,
		float	invLengthA,
		const int8_t *__restrict	b0,
		const int8_t *__restrict	b1,
		const int8_t *__restrict	b2,
		const int8_t *__restrict	b3,
		float *__restrict	invLengthsB,
		float *__restrict	dotProducts
	)

Normalized dot product of one 128-byte vector against 4 others.

Dot product of vector (a) against 4 others (b0,b1,b2,b3):
<a|b0>, <a|b1>, <a|b2>, <a|b3> using their given inverse lengths for normalization.

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
invLengthA	Inverse of vector A.
b0	Vector. NOTE: array should be 128-bit aligned
b1	Vector. NOTE: array should be 128-bit aligned
b2	Vector. NOTE: array should be 128-bit aligned
b3	Vector. NOTE: array should be 128-bit aligned
invLengthsB	Pointer to an array of the inverse values of each B vector. The pointer must point to 4 floating point values. WARNING: array should be 128-bit aligned
dotProducts	Output of the 4 results { <a\|b0>, <a\|b1>, <a\|b2>, <a\|b3> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProductNorm128x4u8	(	const uint8_t *__restrict	a,
		float	invLengthA,
		const uint8_t *__restrict	b0,
		const uint8_t *__restrict	b1,
		const uint8_t *__restrict	b2,
		const uint8_t *__restrict	b3,
		float *__restrict	invLengthsB,
		float *__restrict	dotProducts
	)

Normalized dot product of one 128-byte vector against 4 others.

Dot product of vector (a) against 4 others (b0,b1,b2,b3):
<a|b0>, <a|b1>, <a|b2>, <a|b3> using their given inverse lengths for normalization.

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
invLengthA	Inverse of vector A.
b0	Vector. NOTE: array should be 128-bit aligned
b1	Vector. NOTE: array should be 128-bit aligned
b2	Vector. NOTE: array should be 128-bit aligned
b3	Vector. NOTE: array should be 128-bit aligned
invLengthsB	Pointer to an array of the inverse values of each B vector. The pointer must point to 4 floating point values.
dotProducts	Output of the 4 results { <a\|b0>, <a\|b1>, <a\|b2>, <a\|b3> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProductNorm36x4s8	(	const int8_t *__restrict	a,
		float	invLengthA,
		const int8_t *__restrict	b0,
		const int8_t *__restrict	b1,
		const int8_t *__restrict	b2,
		const int8_t *__restrict	b3,
		float *__restrict	invLengthsB,
		float *__restrict	dotProducts
	)

Normalized dot product of one 36-byte vector against 4 others.

Dot product of 36-byte vector (a) against 4 others (b0,b1,b2,b3):
<a|b0>, <a|b1>, <a|b2>, <a|b3> using their given inverse lengths for normalization.

Parameters:

a	Vector.
invLengthA	Inverse of vector A.
b0	Vector.
b1	Vector.
b2	Vector.
b3	Vector.
invLengthsB	Pointer to an array of the inverse values of each B vector. The pointer must point to 4 floating point values. WARNING: array should be 128-bit aligned
dotProducts	Output of the 4 results { <a\|b0>, <a\|b1>, <a\|b2>, <a\|b3> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProductNorm36x4u8	(	const uint8_t *__restrict	a,
		float	invLengthA,
		const uint8_t *__restrict	b0,
		const uint8_t *__restrict	b1,
		const uint8_t *__restrict	b2,
		const uint8_t *__restrict	b3,
		float *__restrict	invLengthsB,
		float *__restrict	dotProducts
	)

Normalized dot product of one 36-byte vector against 4 others.

Dot product of 36-byte vector (a) against 4 others (b0,b1,b2,b3):
<a|b0>, <a|b1>, <a|b2>, <a|b3> using their given inverse lengths for normalization.

Parameters:

a	Vector.
invLengthA	Inverse of vector A.
b0	Vector.
b1	Vector.
b2	Vector.
b3	Vector.
invLengthsB	Pointer to an array of the inverse values of each B vector. The pointer must point to 4 floating point values. WARNING: array should be 128-bit aligned
dotProducts	Output of the 4 results { <a\|b0>, <a\|b1>, <a\|b2>, <a\|b3> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProductNorm64x4s8	(	const int8_t *__restrict	a,
		float	invLengthA,
		const int8_t *__restrict	b0,
		const int8_t *__restrict	b1,
		const int8_t *__restrict	b2,
		const int8_t *__restrict	b3,
		float *__restrict	invLengthsB,
		float *__restrict	dotProducts
	)

Normalized dot product of one 64-byte vector against 4 others.

Dot product of 36-byte vector (a) against 4 others (b0,b1,b2,b3):
<a|b0>, <a|b1>, <a|b2>, <a|b3> using their given inverse lengths for normalization.

Parameters:

a	Vector.
invLengthA	Inverse of vector A.
b0	Vector.
b1	Vector.
b2	Vector.
b3	Vector.
invLengthsB	Pointer to an array of the inverse values of each B vector. The pointer must point to 4 floating point values. WARNING: array should be 128-bit aligned
dotProducts	Output of the 4 results { <a\|b0>, <a\|b1>, <a\|b2>, <a\|b3> }. WARNING: array should be 128-bit aligned

FASTCV_API void fcvDotProductNorm64x4u8	(	const uint8_t *__restrict	a,
		float	invLengthA,
		const uint8_t *__restrict	b0,
		const uint8_t *__restrict	b1,
		const uint8_t *__restrict	b2,
		const uint8_t *__restrict	b3,
		float *__restrict	invLengthsB,
		float *__restrict	dotProducts
	)

Normalized dot product of one 64-byte vector against 4 others.

Dot product of 36-byte vector (a) against 4 others (b0,b1,b2,b3):
<a|b0>, <a|b1>, <a|b2>, <a|b3> using their given inverse lengths for normalization.

Parameters:

a	Vector.
invLengthA	Inverse of vector A.
b0	Vector.
b1	Vector.
b2	Vector.
b3	Vector.
invLengthsB	Pointer to an array of the inverse values of each B vector. The pointer must point to 4 floating point values. WARNING: array should be 128-bit aligned
dotProducts	Output of the 4 results { <a\|b0>, <a\|b1>, <a\|b2>, <a\|b3> }. WARNING: array should be 128-bit aligned

FASTCV_API int32_t fcvDotProducts8	(	const int8_t *__restrict	a,
		const int8_t *__restrict	b,
		unsigned int	abSize
	)

Dot product of two 8-bit vectors.

Parameters:

a	Vector.
b	Vector.
abSize	Number of elements in A and B.

Returns:: Dot product <A|B>.

FASTCV_API uint32_t fcvDotProductu8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		unsigned int	abSize
	)

Dot product of two 8-bit vectors.

Parameters:

a	Vector A.
b	Vector B.
abSize	Number of elements in A and B.

Returns:: Dot product <A|B>.

FASTCV_API void fcvElementMultiplyf32	(	const float32_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const float32_t *	src2,
		uint32_t	src2Stride,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Element-wise multiplication of two float32_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width4. WARNING:* should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width4. WARNING:* should be multiple of 8
dst	the result matrix (float32_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width4. WARNING:* should be multiple of 8

FASTCV_API fcvStatus fcvElementMultiplys16	(	const int16_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const int16_t *	src2,
		uint32_t	src2Stride,
		int8_t	scaleFactor,
		fcvConvertPolicy	policy,
		int16_t *__restrict	dst,
		uint32_t	dstStride
	)

Element-wise multiplication of two int16_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width2. WARNING:* should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width2. WARNING:* should be multiple of 8
scaleFactor	The number of bits to be shifted to scale the result. When scaleFactor > 0, results are right-shifted; when scaleFactor < 0, results are left-shifted by absolute value of scaleFactor
policy	Conversion policy that decides how data overflow should be handled
dst	the result matrix (int16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

Returns:: FASTCV_SUCCESS upon success, other values upon failure.

FASTCV_API fcvStatus fcvElementMultiplyu8	(	const uint8_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *	src2,
		uint32_t	src2Stride,
		int8_t	scaleFactor,
		fcvConvertPolicy	policy,
		uint8_t *__restrict	dst,
		uint32_t	dstStride
	)

Element-wise multiplication of two uint8_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
scaleFactor	The number of bits to be shifted to scale the result. When scaleFactor > 0, results are right-shifted; when scaleFactor < 0, results are left-shifted by absolute value of scaleFactor
policy	Conversion policy that decides how data overflow should be handled
dst	the result matrix (uint8_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width. WARNING: should be multiple of 8

Returns:: FASTCV_SUCCESS upon success, other values upon failure.

FASTCV_API fcvStatus fcvElementMultiplyu8s16	(	const uint8_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *	src2,
		uint32_t	src2Stride,
		int8_t	scaleFactor,
		fcvConvertPolicy	policy,
		int16_t *__restrict	dst,
		uint32_t	dstStride
	)

Element-wise multiplication of two uint8_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
scaleFactor	The number of bits to be shifted to scale the result. When scaleFactor > 0, results are right-shifted; when scaleFactor < 0, results are left-shifted by absolute value of scaleFactor
policy	Conversion policy that decides how data overflow should be handled
dst	the result matrix (int16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

Returns:: FASTCV_SUCCESS upon success, other values upon failure.

FASTCV_API void fcvElementMultiplyu8u16	(	const uint8_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *	src2,
		uint32_t	src2Stride,
		uint16_t *__restrict	dst,
		uint32_t	dstStride
	)

Element-wise multiplication of two uint8_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
dst	the result matrix (uint16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

FASTCV_API fcvStatus fcvFlipRGB888u8	(	const uint8_t *	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *	dst,
		uint32_t	dstStride,
		fcvFlipDir	dir
	)

Flips an interleaved RGB image.

Flips one uint8_t type interleaved RGB image . If src and dst point to the same address and srcStride equals to dstStride, it will do in-place flip.

Parameters:

src	Input unsigned 8-bit integer image. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the image.
srcHeight	Height of the source image.
srcStride	Stride of the Image in bytes. NOTE: if 0, srcStride is set as 3 x srcWidth. WARNING: should be multiple of 8, and at least as much as 3 x srcWidth if not 0.
dst	Output unsigned 8-bit integer image. Size of buffer is dstStridesrcHeight bytes. If src equals to dst and srcStride equals to dstStride,it will do in-place flip. WARNING:* should be 128-bit aligned.
dstStride	Stride of the output image in bytes. NOTE: if 0, dstStride is set as 3 x srcWidth. WARNING: should be multiple of 8, and at least as much as 3 x srcWidth if not 0.
dir	Flip direction (FASTCV_FLIP_HORIZ, FASTCV_FLIP_VERT or FASTCV_FLIP_BOTH).

FASTCV_API void fcvFlipu16	(	const uint16_t *	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint16_t *	dst,
		uint32_t	dstStride,
		fcvFlipDir	dir
	)

Flip one uint16_t type matrix. If src and dst point to the same address, and srcStride equals to dstStride, it will do in-place flip.

Parameters:

src	Source matrix. NOTE: array should be 128-bit aligned
srcWidth	Width of the source matrix.
srcHeight	Height of the source matrix.
srcStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 srcStride is default to srcWidth2. WARNING:* should be multiple of 8
dst	the result matrix. If src equals to dst and srcStride equals to dstStride, it will do in-place flip. NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to srcWidth2. WARNING:* should be multiple of 8
dir	Flip direction (FASTCV_FLIP_HORIZ, FASTCV_FLIP_VERT or FASTCV_FLIP_BOTH).

FASTCV_API void fcvFlipu8	(	const uint8_t *	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *	dst,
		uint32_t	dstStride,
		fcvFlipDir	dir
	)

Flip one uint8_t type matrix. If src and dst point to the same address and srcStride equals to dstStride, it will do in-place flip.

Parameters:

src	Source matrix. NOTE: array should be 128-bit aligned
srcWidth	Width of the source matrix.
srcHeight	Height of the source matrix.
srcStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 srcStride is default to srcWidth. WARNING: should be multiple of 8
dst	the result matrix. If src equals to dst and srcStride equals to dstStride, it will do in-place flip. NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to srcWidth. WARNING: should be multiple of 8
dir	Flip direction (FASTCV_FLIP_HORIZ, FASTCV_FLIP_VERT or FASTCV_FLIP_BOTH).

FASTCV_API uint32_t fcvHammingDistance32x1u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b
	)

Computes the Hamming distance between the two supplied 32-byte vectors.

Parameters:

a	Pointer to 32-byte vector to compute distance.
b	Pointer to 32-byte vector to compute distance.

Returns:: Hamming distance between the two vectors.

FASTCV_API uint32_t fcvHammingDistance32x1u8a4	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b
	)

Computes the Hamming distance between the two supplied 32-byte vectors.

Parameters:

a	Pointer to 32-byte vector to compute distance. WARNING: must be 32-bit aligned
b	Pointer to 32-byte vector to compute distance. WARNING: must be 32-bit aligned

Returns:: Hamming distance between the two vectors.

FASTCV_API void fcvHammingDistance32x4u8a4	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		const uint8_t *__restrict	c,
		const uint8_t *__restrict	d,
		const uint8_t *__restrict	e,
		uint32_t *__restrict	hammingDistances
	)

Computes the Hamming distance between A and each of B,C,D,E 32-byte vectors.

Parameters:

a	Pointer to 32-byte vector to compute distance. WARNING: must be 32-bit aligned
b	Pointer to 32-byte vector to compute distance from A. WARNING: must be 32-bit aligned
c	Pointer to 32-byte vector to compute distance from A. WARNING: must be 32-bit aligned
d	Pointer to 32-byte vector to compute distance from A. WARNING: must be 32-bit aligned
e	Pointer to 32-byte vector to compute distance from A. WARNING: must be 32-bit aligned
hammingDistances	Array to store each Hamming distance between the vectors. WARNING: should be 128-bit aligned

FASTCV_API uint32_t fcvHammingDistance64x1u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b
	)

Computes the Hamming distance between the two supplied 64-byte vectors.

Parameters:

a	Pointer to 64-byte vector to compute distance. WARNING: must be 32-bit aligned
b	Pointer to 64-byte vector to compute distance. WARNING: must be 32-bit aligned

Returns:: Hamming distance between the two vectors.

FASTCV_API uint32_t fcvHammingDistance64x1u8a4	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b
	)

Computes the Hamming distance between the two supplied 64-byte vectors.

Parameters:

a	Pointer to 64-byte vector to compute distance. WARNING: must be 32-bit aligned
b	Pointer to 64-byte vector to compute distance. WARNING: must be 32-bit aligned

Returns:: Hamming distance between the two vectors.

FASTCV_API void fcvHammingDistance64x4u8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		const uint8_t *__restrict	c,
		const uint8_t *__restrict	d,
		const uint8_t *__restrict	e,
		uint32_t *__restrict	hammingDistances
	)

Computes the Hamming distance between A and each of B,C,D,E 64-byte vectors.

Parameters:

a	Pointer to 64-byte vector to compute distance. WARNING: must be 32-bit aligned
b	Pointer to 64-byte vector to compute distance from A. WARNING: must be 32-bit aligned
c	Pointer to 64-byte vector to compute distance from A. WARNING: must be 32-bit aligned
d	Pointer to 64-byte vector to compute distance from A. WARNING: must be 32-bit aligned
e	Pointer to 64-byte vector to compute distance from A. WARNING: must be 32-bit aligned
hammingDistances	Array to store each Hamming distance between the vectors.

FASTCV_API void fcvHammingDistance64x4u8a4	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		const uint8_t *__restrict	c,
		const uint8_t *__restrict	d,
		const uint8_t *__restrict	e,
		uint32_t *__restrict	hammingDistances
	)

Computes the Hamming distance between A and each of B,C,D,E 64-byte vectors.

Parameters:

a	Pointer to 32-byte vector to compute distance. WARNING: must be 32-bit aligned
b	Pointer to 32-byte vector to compute distance from A. WARNING: must be 32-bit aligned
c	Pointer to 32-byte vector to compute distance from A. WARNING: must be 32-bit aligned
d	Pointer to 32-byte vector to compute distance from A. WARNING: must be 32-bit aligned
e	Pointer to 32-byte vector to compute distance from A. WARNING: must be 32-bit aligned
hammingDistances	Array to store each Hamming distance between the vectors. WARNING: should be 128-bit aligned

FASTCV_API uint32_t fcvHammingDistanceu8	(	const uint8_t *__restrict	a,
		const uint8_t *__restrict	b,
		unsigned int	abLength
	)

Computes the Hamming distance between the two supplied arbitrary length vectors.

Parameters:

a	Pointer to vector to compute distance.
b	Pointer to vector to compute distance.
abLength	Length in bits of each of the vectors. Assumed that the remainder of bits modulo 8 will be set to 0 a priori.

Returns:: Hamming distance between the two vectors.

FASTCV_API void fcvJacobianSE2f32	(	const uint8_t *__restrict	warpedImage,
		const uint16_t *__restrict	warpedBorder,
		const uint8_t *__restrict	targetImage,
		const int16_t *__restrict	targetDX,
		const int16_t *__restrict	targetDY,
		uint32_t	width,
		uint32_t	height,
		uint32_t	stride,
		float32_t *__restrict	sumJTJ,
		float32_t *__restrict	sumJTE,
		float32_t *__restrict	sumError,
		uint32_t *__restrict	numPixels
	)

Calculates JTJ, JTE and the sum absolute, normalized pixel differences for a target image and a reference image of same size for an SE2 image motion model. Since gradients are required for this algorithm all border pixels in referenceImage and targetImage are ignored.
NOTE: Only works for images with even width and height.

Parameters:

warpedImage	Grayscale 8-bit image. NOTE: should be 128-bit aligned.
warpedBorder	Array with the x-coordinates of left-most and right-most pixels for each scanline to consider in warpedImage. Format is l0,r0,l1,r1,l2,... where l_ and r_ are the left-most and right-most pixel coordinates for a scanline. NOTE: should be 128-bit aligned.
targetImage	Grayscale 8-bit image. NOTE: should be 128-bit aligned.
targetDX	X-gradients of the target image as 16-bit signed integers. NOTE: share same width, height and stride as targetImage. Stride is in units of pixels instead of bytes for targetDX. NOTE: should be 128-bit aligned.
targetDY	Y-gradients of the target image as 16-bit signed integers. NOTE: share same width, height and stride as targetImage. Stride is in units of pixels instead of bytes for targetDY. NOTE: should be 128-bit aligned.
width	Width of the reference image and target image. Must be even.
height	Height of the reference image and target image. Must be even.
stride	Stride (in bytes) of reference image and target image, is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 stride is default to width. NOTE: should be a multiple of 8.
sumJTJ	3x3 matrix (9 floats) receiving the sum of JTJ for all pixels. Only the upper half triangle matrix is filled. NOTE: should be 128-bit aligned.
sumJTE	3 vector (3 floats) receiving the sum of JTE for all pixels.
sumError	Sum of absolute, normalized pixel differences for all processed pixels (1 float).
numPixels	Number of pixels that have been processed (1 integer).

FASTCV_API void fcvMatrixMultiplyf32	(	const float32_t *__restrict	src1,
		uint32_t	src1Width,
		uint32_t	src1Height,
		uint32_t	src1Stride,
		const float32_t *__restrict	src2,
		uint32_t	src2Width,
		uint32_t	src2Stride,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix multiplication of two float32_t type matrices.

Parameters:

src1	First source matrix. The size of src1 is src1Stridesrc1Height. NOTE:* array should be 128-bit aligned
src1Width	Width of the first source matrix.
src1Height	Height of the first source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to src1Width4. WARNING:* should be multiple of 8
src2	Second source matrix. The size of src2 is src2Stridesrc1Width. NOTE:* array should be 128-bit aligned
src2Width	Width of the second source matrix.
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to src2Width4. WARNING:* should be multiple of 8
dst	the result matrix (int32_t type). The size of dst is dstStridesrc1Height. NOTE:* array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to src2Width4. WARNING:* should be multiple of 8

FASTCV_API void fcvMatrixMultiplys8s32	(	const int8_t *__restrict	src1,
		uint32_t	src1Width,
		uint32_t	src1Height,
		uint32_t	src1Stride,
		const int8_t *__restrict	src2,
		uint32_t	src2Width,
		uint32_t	src2Stride,
		int32_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix multiplication of two int8_t type matrices.

Parameters:

src1	First source matrix. The size of src1 is src1Stridesrc1Height. NOTE:* array should be 128-bit aligned
src1Width	Width of the first source matrix. NOTE: src1Width should not be larger than 131072
src1Height	Height of the first source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to src1Width. WARNING: should be multiple of 8
src2	Second source matrix. The size of src2 is src2Stridesrc1Width. NOTE:* array should be 128-bit aligned
src2Width	Width of the second source matrix.
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to src2Width. WARNING: should be multiple of 8
dst	the result matrix (int32_t type). The size of dst is dstStridesrc1Height. NOTE:* array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to src2Width4. WARNING:* should be multiple of 8

FASTCV_API fcvStatus fcvMinMaxLocf32	(	const float32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t *__restrict	minVal,
		float32_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY
	)

Finds the minimum and maximum values, and their location in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocf32_v2(). In the 2.0.0 release, fcvMinMaxLocf32_v2 will be renamed to fcvMinMaxLocf32 and the signature of fcvMinMaxLocf32 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found.

Parameters:

src	Input unsigned 32-bit floating point matrix. Size of buffer is srcStride*srcHeight bytes.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	The X coordinate of the minimum value's location returned by the function
minLocY	The Y coordinate of the minimum value's location returned by the function
maxLocX	The X coordinate of the maximum value's location returned by the function
maxLocY	The Y coordinate of the maximum value's location returned by the function

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocf32_v2	(	const float32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t *__restrict	minVal,
		float32_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY,
		uint32_t *__restrict	minCount,
		uint32_t *__restrict	maxCount,
		uint32_t	nMinLocSize,
		uint32_t	nMaxLocSize
	)

Finds the minimum and maximum values, and their locations in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocf32_v2(). In the 2.0.0 release, fcvMinMaxLocf32_v2 will be renamed to fcvMinMaxLocf32 and the signature of fcvMinMaxLocf32 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found. If there are multiple minima/maxima, the function returns them all. If the the number of minima and maxima are greater than the capacity of the minLoc and maxLoc arrays, then the function returns as many locations as the capacity of these arrays.

Parameters:

src	Input 32-bit floating point matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth*sizeof(float32_t) if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	An array of X coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
minLocY	An array of Y coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
maxLocX	An array of X coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
maxLocY	An array of Y coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
minCount	The number of minima found by the function
maxCount	The number of maxima found by the function
nMinLocSize	The maximum number of minima requested by the user to be found in the input image src. The minLocX and minLocY arrays MUST be allocated to have atleast nMinLocSize elements.
nMaxLocSize	The maximum number of maxima requested by the user to be found in the input image src. The maxLocX and maxLocY arrays MUST be allocated to have atleast nMaxLocSize elements.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocs16	(	const int16_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int16_t *__restrict	minVal,
		int16_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY
	)

Finds the minimum and maximum values, and their location in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocs16_v2(). In the 2.0.0 release, fcvMinMaxLocs16_v2 will be renamed to fcvMinMaxLocs16 and the signature of fcvMinMaxLocs16 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found.

Parameters:

src	Input signed 16-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	The X coordinate of the minimum value's location returned by the function
minLocY	The Y coordinate of the minimum value's location returned by the function
maxLocX	The X coordinate of the maximum value's location returned by the function
maxLocY	The Y coordinate of the maximum value's location returned by the function

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocs16_v2	(	const int16_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int16_t *__restrict	minVal,
		int16_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY,
		uint32_t *__restrict	minCount,
		uint32_t *__restrict	maxCount,
		uint32_t	nMinLocSize,
		uint32_t	nMaxLocSize
	)

Finds the minimum and maximum values, and their locations in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocs16_v2(). In the 2.0.0 release, fcvMinMaxLocs16_v2 will be renamed to fcvMinMaxLocs16 and the signature of fcvMinMaxLocs16 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found. If there are multiple minima/maxima, the function returns them all. If the the number of minima and maxima are greater than the capacity of the minLoc and maxLoc arrays, then the function returns as many locations as the capacity of these arrays.

Parameters:

src	Input signed 16-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth*sizeof(int16_t) if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	An array of X coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
minLocY	An array of Y coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
maxLocX	An array of X coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
maxLocY	An array of Y coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
minCount	The number of minima found by the function
maxCount	The number of maxima found by the function
nMinLocSize	The maximum number of minima requested by the user to be found in the input image src. The minLocX and minLocY arrays MUST be allocated to have atleast nMinLocSize elements.
nMaxLocSize	The maximum number of maxima requested by the user to be found in the input image src. The maxLocX and maxLocY arrays MUST be allocated to have atleast nMaxLocSize elements.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocs32	(	const int32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int32_t *__restrict	minVal,
		int32_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY
	)

Finds the minimum and maximum values, and their location in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocs32_v2(). In the 2.0.0 release, fcvMinMaxLocs32_v2 will be renamed to fcvMinMaxLocs32 and the signature of fcvMinMaxLocs32 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found.

Parameters:

src	Input signed 32-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	The X coordinate of the minimum value's location returned by the function
minLocY	The Y coordinate of the minimum value's location returned by the function
maxLocX	The X coordinate of the maximum value's location returned by the function
maxLocY	The Y coordinate of the maximum value's location returned by the function

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocs32_v2	(	const int32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int32_t *__restrict	minVal,
		int32_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY,
		uint32_t *__restrict	minCount,
		uint32_t *__restrict	maxCount,
		uint32_t	nMinLocSize,
		uint32_t	nMaxLocSize
	)

Finds the minimum and maximum values, and their locations in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocs32_v2(). In the 2.0.0 release, fcvMinMaxLocs32_v2 will be renamed to fcvMinMaxLocs32 and the signature of fcvMinMaxLocs32 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found. If there are multiple minima/maxima, the function returns them all. If the the number of minima and maxima are greater than the capacity of the minLoc and maxLoc arrays, then the function returns as many locations as the capacity of these arrays.

Parameters:

src	Input signed 32-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth*sizeof(int32_t) if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	An array of X coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
minLocY	An array of Y coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
maxLocX	An array of X coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
maxLocY	An array of Y coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
minCount	The number of minima found by the function
maxCount	The number of maxima found by the function
nMinLocSize	The maximum number of minima requested by the user to be found in the input image src. The minLocX and minLocY arrays MUST be allocated to have atleast nMinLocSize elements.
nMaxLocSize	The maximum number of maxima requested by the user to be found in the input image src. The maxLocX and maxLocY arrays MUST be allocated to have atleast nMaxLocSize elements.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocu16	(	const uint16_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint16_t *__restrict	minVal,
		uint16_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY
	)

Finds the minimum and maximum values, and their location in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocu16_v2(). In the 2.0.0 release, fcvMinMaxLocu16_v2 will be renamed to fcvMinMaxLocu16 and the signature of fcvMinMaxLocu16 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found.

Parameters:

src	Input unsigned 16-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	The X coordinate of the minimum value's location returned by the function
minLocY	The Y coordinate of the minimum value's location returned by the function
maxLocX	The X coordinate of the maximum value's location returned by the function
maxLocY	The Y coordinate of the maximum value's location returned by the function

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocu16_v2	(	const uint16_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint16_t *__restrict	minVal,
		uint16_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY,
		uint32_t *__restrict	minCount,
		uint32_t *__restrict	maxCount,
		uint32_t	nMinLocSize,
		uint32_t	nMaxLocSize
	)

Finds the minimum and maximum values, and their locations in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocu16_v2(). In the 2.0.0 release, fcvMinMaxLocu16_v2 will be renamed to fcvMinMaxLocu16 and the signature of fcvMinMaxLocu16 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found. If there are multiple minima/maxima, the function returns them all. If the the number of minima and maxima are greater than the capacity of the minLoc and maxLoc arrays, then the function returns as many locations as the capacity of these arrays.

Parameters:

src	Input unsigned 16-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth*sizeof(uint16_t) if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	An array of X coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
minLocY	An array of Y coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
maxLocX	An array of X coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
maxLocY	An array of Y coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
minCount	The number of minima found by the function
maxCount	The number of maxima found by the function
nMinLocSize	The maximum number of minima requested by the user to be found in the input image src. The minLocX and minLocY arrays MUST be allocated to have atleast nMinLocSize elements.
nMaxLocSize	The maximum number of maxima requested by the user to be found in the input image src. The maxLocX and maxLocY arrays MUST be allocated to have atleast nMaxLocSize elements.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocu32	(	const uint32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint32_t *__restrict	minVal,
		uint32_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY
	)

Finds the minimum and maximum values, and their location in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocu32_v2(). In the 2.0.0 release, fcvMinMaxLocu32_v2 will be renamed to fcvMinMaxLocu32 and the signature of fcvMinMaxLocu32 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found.

Parameters:

src	Input unsigned 32-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	The X coordinate of the minimum value's location returned by the function
minLocY	The Y coordinate of the minimum value's location returned by the function
maxLocX	The X coordinate of the maximum value's location returned by the function
maxLocY	The Y coordinate of the maximum value's location returned by the function

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocu32_v2	(	const uint32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint32_t *__restrict	minVal,
		uint32_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY,
		uint32_t *__restrict	minCount,
		uint32_t *__restrict	maxCount,
		uint32_t	nMinLocSize,
		uint32_t	nMaxLocSize
	)

Finds the minimum and maximum values, and their locations in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocu32_v2(). In the 2.0.0 release, fcvMinMaxLocu32_v2 will be renamed to fcvMinMaxLocu32 and the signature of fcvMinMaxLocu32 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found. If there are multiple minima/maxima, the function returns them all. If the the number of minima and maxima are greater than the capacity of the minLoc and maxLoc arrays, then the function returns as many locations as the capacity of these arrays.

Parameters:

src	Input unsigned 32-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth*sizeof(uint32_t) if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	An array of X coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
minLocY	An array of Y coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
maxLocX	An array of X coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
maxLocY	An array of Y coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
minCount	The number of minima found by the function
maxCount	The number of maxima found by the function
nMinLocSize	The maximum number of minima requested by the user to be found in the input image src. The minLocX and minLocY arrays MUST be allocated to have atleast nMinLocSize elements.
nMaxLocSize	The maximum number of maxima requested by the user to be found in the input image src. The maxLocX and maxLocY arrays MUST be allocated to have atleast nMaxLocSize elements.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocu8	(	const uint8_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *__restrict	minVal,
		uint8_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY
	)

Finds the minimum and maximum values, and their location in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocu8_v2(). In the 2.0.0 release, fcvMinMaxLocu8_v2 will be renamed to fcvMinMaxLocu8 and the signature of fcvMinMaxLocu8 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found.

Parameters:

src	Input unsigned 8-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	The X coordinate of the minimum value's location returned by the function
minLocY	The Y coordinate of the minimum value's location returned by the function
maxLocX	The X coordinate of the maximum value's location returned by the function
maxLocY	The Y coordinate of the maximum value's location returned by the function

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMinMaxLocu8_v2	(	const uint8_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *__restrict	minVal,
		uint8_t *__restrict	maxVal,
		uint32_t *__restrict	minLocX,
		uint32_t *__restrict	minLocY,
		uint32_t *__restrict	maxLocX,
		uint32_t *__restrict	maxLocY,
		uint32_t *__restrict	minCount,
		uint32_t *__restrict	maxCount,
		uint32_t	nMinLocSize,
		uint32_t	nMaxLocSize
	)

Finds the minimum and maximum values, and their locations in a matrix.

ATTENTION: This function's signature will become OBSOLETE in a future release of this library (2.0.0). The new interface is specified in the function: fcvMinMaxLocu8_v2(). In the 2.0.0 release, fcvMinMaxLocu8_v2 will be renamed to fcvMinMaxLocu8 and the signature of fcvMinMaxLocu8 as it appears now, will be removed.

Finds the minimum and maximum values in a matrix, and returns them. In addition to this, it also returns the location (x,y) of the minimum and maximum values thus found. If there are multiple minima/maxima, the function returns them all. If the the number of minima and maxima are greater than the capacity of the minLoc and maxLoc arrays, then the function returns as many locations as the capacity of these arrays.

Parameters:

src	Input unsigned 8-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth*sizeof(uint8_t) if not 0.
minVal	This variable stores the minimum value of the src matrix found by the function
maxVal	This variable stores the maximum value of the src matrix found by the function
minLocX	An array of X coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
minLocY	An array of Y coordinates of the minimum value's location returned by the function Must have
nMinLocSize	elements, i.e., allocated as nMinLocSize*sizeof(uint32_t)
maxLocX	An array of X coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
maxLocY	An array of Y coordinates of the maximum value's location returned by the function Must have
nMaxLocSize	elements, i.e., allocated as nMaxLocSize*sizeof(uint32_t)
minCount	The number of minima found by the function
maxCount	The number of maxima found by the function
nMinLocSize	The maximum number of minima requested by the user to be found in the input image src. The minLocX and minLocY arrays MUST be allocated to have atleast nMinLocSize elements.
nMaxLocSize	The maximum number of maxima requested by the user to be found in the input image src. The maxLocX and maxLocY arrays MUST be allocated to have atleast nMaxLocSize elements.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMultiplyScalarf32	(	const float32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t	scalar,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Multiplies a scalar value to every element of a Matrix.

Multiplies a floating point scalar value to each element of the source Matrix and stores the result of the multiplication in the corresponding element of the destination matrix.

Parameters:

src	Input floating point matrix. Size of buffer is srcStride*srcHeight bytes.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
scalar	The floating point scalar to be multiplied to the source matrix.
dst	Output 32-bit floating point matrix. Size of buffer is dstStride*srcHeight bytes.
dstStride	Stride of the output matrix in bytes. NOTE: if 0, dstStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvMultiplyScalars16	(	const int16_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		int8_t	scalar,
		int8_t	shift,
		int16_t *__restrict	dst,
		uint32_t	dstStride
	)

Multiplies a scalar value to every element of a Matrix.

Multiplies a signed 16-bit scalar value to each element of the source Matrix and stores the result of the multiplication in the corresponding element of the destination matrix.
NOTE : If the product of the scalar and matrix element exceeds the maximum value of a signed 16-bit integer, the result is clipped to this maximum value, while if the product goes below the minimum value of a signed 16-bit integer, it is clipped to this minimum value. The API can also handle fractional scalars. Use fixed point conversion and use the shift parameter to decide the number of bits by which the result is shifted by. This affects the precision of the result. If you wish to perform pure integer multiplication, set the shift parameter to 0.

Parameters:

src	Input signed 16-bit integer matrix. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Matrix.
srcHeight	Height of the Matrix.
srcStride	Stride of the Matrix in bytes. NOTE: if 0, srcStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.
scalar	The signed 8-bit integer scalar to be multiplied to the source matrix.
shift	The number of bits that the result has to be shifted by. Used to handle fractional scalar multiplication. If your input scalar is a pure integer, set shift to 0.
dst	Output signed 16-bit integer matrix. Size of buffer is dstStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
dstStride	Stride of the output matrix in bytes. NOTE: if 0, dstStride is set as srcWidth. WARNING: should be multiple of 8, and at least as much as srcWidth if not 0.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvRotateImageInterleavedu8	(	const uint8_t *	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *	dst,
		uint32_t	dstStride,
		fcvRotateDegree	degree
	)

Rotate one interleaved uint8_t type image (e.g. UV channel in NV21).

Parameters:

src	Source image. NOTE: array should be 128-bit aligned
srcWidth	Number of interleaved pairs in one row. For example, srcWidth = 4 in UVUVUVUV image row.
srcHeight	Height of the source image.
srcStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 srcStride is default to 2srcWidth. WARNING:* should be multiple of 8
dst	the result image. NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to 2srcWidth (FASTCV_ROTATE_180) or 2srcHeight (FASTCV_ROTATE_90 or FASTCV_ROTATE_270). WARNING: should be multiple of 8
degree	Rotate degree (FASTCV_ROTATE_90, FASTCV_ROTATE_180 or FASTCV_ROTATE_270).

FASTCV_API fcvStatus fcvRotateImageu8	(	const uint8_t *	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *	dst,
		uint32_t	dstStride,
		fcvRotateDegree	degree
	)

Rotate one uint8_t type image.

Parameters:

src	Source image. NOTE: array should be 128-bit aligned
srcWidth	Width of the source image.
srcHeight	Height of the source image.
srcStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 srcStride is default to srcWidth. WARNING: should be multiple of 8
dst	the result image. NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to srcWidth (FASTCV_ROTATE_180) or srcHeight (FASTCV_ROTATE_90 or FASTCV_ROTATE_270). WARNING: should be multiple of 8
degree	Rotate degree (FASTCV_ROTATE_90, FASTCV_ROTATE_180 or FASTCV_ROTATE_270).

FASTCV_API void fcvSetElementsc3f32	(	float32_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		float32_t	value1,
		float32_t	value2,
		float32_t	value3,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of a float32_t 3-channel array to a given 3-element scalar.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value1	First float32_t value of the Scalar
value2	Second float32_t value of the Scalar
value3	Third float32_t value of the Scalar
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed
maskStride	Stride for input mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvSetElementsc3s32	(	int32_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		int32_t	value1,
		int32_t	value2,
		int32_t	value3,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of an int32_t 3-channel array to a given 3-element scalar.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value1	First int32_t value of the Scalar
value2	Second int32_t value of the Scalar
value3	Third int32_t value of the Scalar
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed.
maskStride	Stride for input mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvSetElementsc3u8	(	uint8_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		uint8_t	value1,
		uint8_t	value2,
		uint8_t	value3,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of a uint8_t 3-channel array to a given 3-element scalar.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value1	First uint8_t value of the Scalar
value2	Second uint8_t value of the Scalar
value3	Third uint8_t value of the Scalar
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed
maskStride	Stride for input mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvSetElementsc4f32	(	float32_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		float32_t	value1,
		float32_t	value2,
		float32_t	value3,
		float32_t	value4,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of a float32_t 4-channel array to a given 4-element scalar.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value1	First float32_t value of the Scalar
value2	Second float32_t value of the Scalar
value3	Third float32_t value of the Scalar
value4	Fourth float32_t value of the Scalar
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed
maskStride	Stride for input mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvSetElementsc4s32	(	int32_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		int32_t	value1,
		int32_t	value2,
		int32_t	value3,
		int32_t	value4,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of an int32_t 4-channel array to a given 4-element scalar.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value1	First int32_t value of the Scalar
value2	Second int32_t value of the Scalar
value3	Third int32_t value of the Scalar
value4	Fourth int32_t value of the Scalar
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed.
maskStride	Stride for input mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvSetElementsc4u8	(	uint8_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		uint8_t	value1,
		uint8_t	value2,
		uint8_t	value3,
		uint8_t	value4,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of a uint8_t 4-channel array to a given 4-element scalar.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value1	First uint8_t value of the Scalar
value2	Second uint8_t value of the Scalar
value3	Third uint8_t value of the Scalar
value4	Fourth uint8_t value of the Scalar
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed
maskStride	Stride for input mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvSetElementsf32	(	float32_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		float32_t	value,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of a float32_t single channel array to a given value.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value	the input float32_t value
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed
maskStride	Stride for input mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvSetElementss32	(	int32_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		int32_t	value,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of an int32_t single channel array to a given value.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value	the input int32_t value
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed
maskStride	Stride for input mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API void fcvSetElementsu8	(	uint8_t *__restrict	dst,
		uint32_t	dstWidth,
		uint32_t	dstHeight,
		uint32_t	dstStride,
		uint8_t	value,
		const uint8_t *__restrict	mask,
		uint32_t	maskStride
	)

Sets every element of a uint8_t single channel array to a given value.

A non-zero element of the mask array indicates the corresponding element of the destination array to be changed. The mask itself equals to zero means that all elements of the dst array need to be changed. The mask is assumed to have the same width and height( in terms of pixels) as the destination array.

Parameters:

dst	The destination matrix
dstWidth	Destination matrix width
dstHeight	Destination matrix height
dstStride	Stride for the destination matrix, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row
value	the input uint8_t value
mask	Operation mask, 8-bit single channel array; specifies elements of the src array to be changed.
maskStride	Stride for the mask, i.e. the gap (in terms of bytes) between the first element of a row and that of the successive row

Returns:: No return value

FASTCV_API int32_t fcvSolveCholeskyf32	(	float32_t *__restrict	A,
		const float32_t *__restrict	b,
		float32_t *__restrict	diag,
		uint32_t	N,
		float32_t *__restrict	x
	)

Executes Cholesky decomposition algorithm on a symmetric and positive definite matrix to solve the linear system A*x = b, where A is an NxN matrix and x & b are vectors of size N.

Parameters:

A	Pointer to the matrix A or size NxN. NOTE: This matrix WILL BE MODIFIED during computation. Please SAVE THE ORIGINAL MATRIX properly if necessary. NOTE: should be 128-bit aligned.
b	Pointer to the vector b of size N. NOTE: should be 128-bit aligned.
diag	Pointer to the buffer for the diagonal of matrix A. This buffer is used for computation. NOTE: should be 128-bit aligned.
N	Size of matrix and vectors.
x	Pointer to the output vector x of size N. NOTE: should be 128-bit aligned.

Returns:: 1 if the linear system could be solved or 0 otherwise.

FASTCV_API void fcvSolvef32	(	const float32_t *__restrict	A,
		int32_t	numCols,
		int32_t	numRows,
		const float32_t *__restrict	b,
		float32_t *__restrict	x
	)

Solve linear equation system Ax = b.

Parameters:

A	The matrix contains coefficients of the linear equation system
numRows	The number of rows for the matrix A
numCols	The number of columns for the matrix A
b	The right side value
x	The solution vector

Returns:

FASTCV_API int32_t fcvSolveLDLf32	(	float32_t *__restrict	A,
		const float32_t *__restrict	b,
		float32_t *__restrict	diag,
		uint32_t	N,
		float32_t *__restrict	x
	)

Executes LDL decomposition algorithm on a symmetric and positive definite matrix to solve the linear system A*x = b, where A is an NxN matrix and x & b are vectors of size N.

Parameters:

A	Pointer to the matrix A or size NxN. NOTE: This matrix WILL BE MODIFIED during computation. Please SAVE THE ORIGINAL MATRIX properly if necessary. NOTE: should be 128-bit aligned.
b	Pointer to the vector b of size N. NOTE: should be 128-bit aligned.
diag	Pointer to the buffer for the diagonal of matrix A. This buffer is used for computation. NOTE: should be 128-bit aligned.
N	Size of matrix and vectors.
x	Pointer to the output vector x of size N. NOTE: should be 128-bit aligned.

Returns:: 1 if the linear system could be solved or 0 otherwise.

FASTCV_API fcvStatus fcvSolveLUf32	(	float32_t *__restrict	A,
		float32_t *__restrict	b,
		uint32_t	N,
		uint8_t *__restrict	pivot,
		float32_t *__restrict	x
	)

Solves a Linear System of Equations using LU-Decomposition.

Solves a Linear System of Equations using LU Decomposition. Given a system defined by A x = b, the function decomposes A into a lower triangular matrix L and an upper triangular matrix U. It then computes x by first solving L y = B by forward substitution for y, and then solving the system of linear equations U x = y by backward substitution for x.
NOTE : Pivoting is used here to ensure that any non singular matrix can be solved. This is because not all Matrices have a LU decomposition. Pivoting helps to overcome this issue.

Parameters:

A	Input coefficient matrix of the linear system of dimension N x N WARNING: must be square.
b	Component vector of the linear system of dimension N x 1
N	Dimension of the input Matrix A and component vector b
pivot	An N x 1 pivot array which is populated as follows : For each k = 0, 1,...., N-1 The ith element of pivot contains the row interchanged with row i when k = i. Pivoting is used both for numerical stability and also ensuring that an LU factorization exists for the input matrix A.
x	The solution of the linear system, an N x 1 Vector.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API void fcvSort8Scoresf32	(	float *__restrict	inScores,
		float *__restrict	outScores
	)

Sorting of 8 float numbers.

Perform sorting of 8 scores in ascending order (output of SumOfSquaredDiffs)

Parameters:

inScores	Input 8 element float array NOTE: array should be 128-bit aligned
outScores	Output is 8 element sorted float array WARNING: array should be 128-bit aligned

FASTCV_API fcvStatus fcvSubtracts16	(	const int16_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const int16_t *__restrict	src2,
		uint32_t	src2Stride,
		fcvConvertPolicy	policy,
		int16_t *	dst,
		uint32_t	dstStride
	)

Matrix substration of two uint16_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width2. WARNING:* should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width2. WARNING:* should be multiple of 8
policy	Conversion policy that decides how data overflow should be handled
dst	the result matrix (int16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvSubtractu8	(	const uint8_t *	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride,
		fcvConvertPolicy	policy,
		uint8_t *	dst,
		uint32_t	dstStride
	)

Matrix substration of two uint8_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
policy	Conversion policy that decides how data overflow should be handled
dst	the result matrix (uint8_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width. WARNING: should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API fcvStatus fcvSubtractu8s16	(	const uint8_t *__restrict	src1,
		uint32_t	width,
		uint32_t	height,
		uint32_t	src1Stride,
		const uint8_t *__restrict	src2,
		uint32_t	src2Stride,
		int16_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix substration of two uint8_t type matrices.

Parameters:

src1	First source matrix. NOTE: array should be 128-bit aligned
width	Width of the source matrix.
height	Height of the source matrix.
src1Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src1Stride is default to width. WARNING: should be multiple of 8
src2	Second source matrix. NOTE: array should be 128-bit aligned
src2Stride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 src2Stride is default to width. WARNING: should be multiple of 8
dst	the result matrix (int16_t type). NOTE: array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to width2. WARNING:* should be multiple of 8

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API void fcvSumOfSquaredDiffs36x4s8	(	const int8_t *__restrict	a,
		float	invLenA,
		const int8_t *__restrict	b0,
		const int8_t *__restrict	b1,
		const int8_t *__restrict	b2,
		const int8_t *__restrict	b3,
		const float *__restrict	invLenB,
		float *__restrict	distances
	)

Sum of squared differences of one 36-byte vector against 4 others.

SSD of one vector (a) against 4 others (b0,b1,b2,b3) using their given inverse lengths for normalization.

SSD(a,b0), SSD(a,b1), SSD(a,b2), SSD(a,b3)

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
invLenA	Inverse of vector A = 1/\|A\|
b0	Vector. NOTE: array should be 128-bit aligned
b1	Vector. NOTE: array should be 128-bit aligned
b2	Vector. NOTE: array should be 128-bit aligned
b3	Vector. NOTE: array should be 128-bit aligned
invLenB	Inverse of vectors b0...b3 = 1/\|b0\|,... 1/\|b3\| WARNING: array should be 128-bit aligned
distances	Output of the 4 results { SSD(a,b0), SSD(a,b1), SSD(a,b2), SSD(a,b3) }. ACCURACY: 1.0e-6 WARNING: array should be 128-bit aligned

FASTCV_API void fcvSumOfSquaredDiffs36xNs8	(	const int8_t *__restrict	a,
		float	invLenA,
		const int8_t const __restrict	b,
		const float *__restrict	invLenB,
		unsigned int	numB,
		float *__restrict	distances
	)

Sum of squared differences of one 36-byte vector against N others.

SSD of one vector (a) against N other 36-byte vectors ( b[0], b[1], ..., b[n-1] ) using their given inverse lengths for normalization.

SSD(a,b[0]), SSD(a,b[1]), ..., SSD(a,b[n-1])

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
invLenA	Inverse of vector A = 1/\|A\|
b	Vectors b[0]...b[n-1]. WARNING: should be 128-bit aligned.
invLenB	Inverse of vectors b[0]...b[n-1] = 1/\|b[0]\|,... 1/\|b[n-1]\| WARNING: should be 128-bit aligned.
numB	Number of B vectors.
distances	Output of the N results { SSD(a,b[0]), SSD(a,b[1]), ..., SSD(a,b[n-1]) }. ACCURACY: 1.0e-6 WARNING: should be 128-bit aligned.

FASTCV_API void fcvSumOfSquaredDiffsf32	(	const float32_t *__restrict	a,
		float32_t	invLenA,
		uint32_t	dim,
		const float32_t const __restrict	bList,
		const float32_t *__restrict	invLenB,
		uint32_t	numB,
		float32_t *__restrict	distances
	)

Sum of squared differences of one floating vector of L-elements against N others.

SSD of one vector (a) against N other L-elements vectors ( b[0], b[1], ..., b[n-1] ) using their given inverse lengths for normalization.

SSD(a,b[0]), SSD(a,b[1]), ..., SSD(a,b[n-1])

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
invLenA	Inverse of vector A = 1/\|A\|
dim	Number of element of vector A
bList	Vectors b[0]...b[n-1]. WARNING: should be 128-bit aligned.
invLenB	Inverse of vectors b[0]...b[n-1] = 1/\|b[0]\|,... 1/\|b[n-1]\| WARNING: should be 128-bit aligned.
numB	Number of B vectors.
distances	Output of the N results { SSD(a,b[0]), SSD(a,b[1]), ..., SSD(a,b[n-1]) } WARNING: should be 128-bit aligned.

FASTCV_API void fcvSumOfSquaredDiffss8	(	const int8_t *__restrict	a,
		float32_t	invLenA,
		uint32_t	dim,
		const int8_t const __restrict	bList,
		const float32_t *__restrict	invLenB,
		uint32_t	numB,
		float32_t *__restrict	distances
	)

Sum of squared differences of one L-byte vector against N others.

SSD of one vector (a) against N other L-byte vectors ( b[0], b[1], ..., b[n-1] ) using their given inverse lengths for normalization.

SSD(a,b[0]), SSD(a,b[1]), ..., SSD(a,b[n-1])

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
invLenA	Inverse of vector A = 1/\|A\|
dim	Number of element of vector A
bList	Vectors b[0]...b[n-1]. WARNING: should be 128-bit aligned.
invLenB	Inverse of vectors b[0]...b[n-1] = 1/\|b[0]\|,... 1/\|b[n-1]\| WARNING: should be 128-bit aligned.
numB	Number of B vectors.
distances	Output of the N results { SSD(a,b[0]), SSD(a,b[1]), ..., SSD(a,b[n-1]) }. WARNING: should be 128-bit aligned.

FASTCV_API void fcvSumOfSquaredDiffsu8	(	const uint8_t *__restrict	a,
		float32_t	invLenA,
		uint32_t	dim,
		const uint8_t const __restrict	bList,
		const float32_t *__restrict	invLenB,
		uint32_t	numB,
		float32_t *__restrict	distances
	)

Sum of squared differences of one L-byte vector against N others.

SSD of one vector (a) against N other L-byte vectors ( b[0], b[1], ..., b[n-1] ) using their given inverse lengths for normalization.

SSD(a,b[0]), SSD(a,b[1]), ..., SSD(a,b[n-1])

Parameters:

a	Vector. NOTE: array should be 128-bit aligned
invLenA	Inverse of vector A = 1/\|A\|
dim	Number of element of vector A
bList	Vectors b[0]...b[n-1]. WARNING: should be 128-bit aligned.
invLenB	Inverse of vectors b[0]...b[n-1] = 1/\|b[0]\|,... 1/\|b[n-1]\| WARNING: should be 128-bit aligned.
numB	Number of B vectors.
distances	Output of the N results { SSD(a,b[0]), SSD(a,b[1]), ..., SSD(a,b[n-1]) } WARNING: should be 128-bit aligned.

FASTCV_API void fcvSVDf32	(	const float32_t *__restrict	A,
		uint32_t	m,
		uint32_t	n,
		float32_t *__restrict	w,
		float32_t *__restrict	U,
		float32_t *__restrict	Vt,
		float32_t *	tmpU,
		float32_t *	tmpV
	)

Compute a singular value decomposition of a matrix of a float type A = U*diag[w]*Vt; It is used for solving problems like least-squares, under-determined linear systems, matrix inversion and so forth. The algorithm used here does not compute the full U and V matrices however it computes a condensed version of U and V described below which is sufficient to solve most problems which use SVD.

Parameters:

A	The input matrix of dimensions m x n NOTE: should be 128-bit aligned.
m	The number of rows of matrix A
n	The number of columns of matrix A
w	The pointer to the buffer that holds n singular values. When m>n it contains n singular values while when m<n, only the first m singular values are of any significance. However, during allocation, it should be allocated as a buffer to hold n floats. NOTE: should be 128-bit aligned.
U	The U matrix whose dimension is m x min(m,n). This is not the full size U matrix obtained from the conventional SVD algorithm but is sufficient for solving problems like least-squares, under-determined linear systems, matrix inversion and so forth. While allocating, allocate as a matrix of m x n floats. NOTE: should be 128-bit aligned.
Vt	The V matrix whose dimension is n x min(m,n). This is not the full size V matrix obtained from the conventional SVD algorithm but is sufficient for solving problems like least-squares, under-determined linear systems, matrix inversion and so forth. While allocating, allocate as a matrix of n x n floats. NOTE: should be 128-bit aligned.
tmpU	Temporary buffer used in processing. It must be allocated as an array of size m x n NOTE: should be 128-bit aligned.
tmpV	Temporary buffer used in processing. It must be allocated as an array of size n x n NOTE: should be 128-bit aligned.

Returns:

FASTCV_API void fcvTransposef32	(	const float32_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		float32_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix transpose of one float32_t type matrix.

Parameters:

src	Source matrix. The size of src is srcStridesrcHeight. NOTE:* array should be 128-bit aligned
srcWidth	Width of the source matrix.
srcHeight	Height of the source matrix.
srcStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 srcStride is default to srcWidth4. WARNING:* should be multiple of 8
dst	Transpose of the source matrix. The size of dst is dstStridesrcWidth. NOTE:* array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to srcHeight4. WARNING:* should be multiple of 8

FASTCV_API fcvStatus fcvTransposeRGB888u8	(	const uint8_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *__restrict	dst,
		uint32_t	dstStride
	)

Transposes an interleaved RGB image.

Computes the transpose of an interleaved RGB image src, and stores the result in dst

Parameters:

src	Input unsigned 8-bit integer image. Size of buffer is srcStridesrcHeight bytes. WARNING:* should be 128-bit aligned.
srcWidth	Width of the Image.
srcHeight	Height of the Image.
srcStride	Stride of the Image in bytes. NOTE: if 0, srcStride is set as 3 x srcWidth. WARNING: should be multiple of 8, and at least as much as 3 x srcWidth if not 0.
dst	Output unsigned 8-bit integer image. Size of buffer is dstStridesrcWidth bytes. WARNING:* should be 128-bit aligned.
dstStride	Stride of the output image in bytes. NOTE: if 0, srcStride is set as 3 x srcHeight. WARNING: should be multiple of 8, and at least as much as 3 x srcHeight if not 0.

Returns:: FASTCV_SUCCESS upon success. Other status codes upon failure.

FASTCV_API void fcvTransposeu16	(	const uint16_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint16_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix transpose of one uint16_t type matrix.

Parameters:

src	Source matrix. The size of src is srcStridesrcHeight. NOTE:* array should be 128-bit aligned
srcWidth	Width of the source matrix.
srcHeight	Height of the source matrix.
srcStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 srcStride is default to srcWidth2. WARNING:* should be multiple of 8
dst	Transpose of the source matrix. The size of dst is dstStridesrcWidth. NOTE:* array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to srcHeight2. WARNING:* should be multiple of 8

FASTCV_API void fcvTransposeu8	(	const uint8_t *__restrict	src,
		uint32_t	srcWidth,
		uint32_t	srcHeight,
		uint32_t	srcStride,
		uint8_t *__restrict	dst,
		uint32_t	dstStride
	)

Matrix transpose of one uint8_t type matrix.

Parameters:

src	Source matrix.The size of src is srcStridesrcHeight. NOTE:* array should be 128-bit aligned
srcWidth	Width of the source matrix.
srcHeight	Height of the source matrix.
srcStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 srcStride is default to srcWidth. WARNING: should be multiple of 8
dst	Transpose of the source matrix. The size of dst is dstStridesrcWidth. NOTE:* array should be 128-bit aligned
dstStride	Stride is the number of bytes between column 0 of row 1 and column 0 of row 2 in data memory. If left at 0 dstStride is default to srcHeight. WARNING: should be multiple of 8

FASTCV_API int fcvVecNormalize36s8f32	(	const int8_t *__restrict	src,
		unsigned int	srcStride,
		const float *__restrict	invLen,
		unsigned int	numVecs,
		float	reqNorm,
		float *__restrict	dst,
		int32_t *	stopBuild
	)

Translate to float and normalize 36 8-bit elements.

Parameters:

src	Pointer to the first input vector
invLen	Pointer to inverse length of the first input vector located right after each 36 element vector
numVecs	Number of vectors to translate
reqNorm	Required norm
srcStride	Step in bytes to data of the next vector Each vector has 36 8-bit elements and 1 float invLen
dst	Pointer to contiguous block for output vectors WARNING: should be 128-bit aligned.
stopBuild	Allows other threads to break this function in the middle of processing. When set to 1, the function will exit on the next iteration.

Returns:: 0 - success EFAULT - invalid address EINVAL - invalid argument

Functions

Detailed Description

Function Documentation