Intel® Math Kernel Library 11.3 Update 4 Developer Guide
The following are examples of Compiler Assisted Offload. Please see Intel® Compiler User and Reference Guide for more details.
These examples show how to call Intel MKL from offload regions that are executed on coprocessors based on Intel MIC Architecture and how to reuse data that already exists in the memory of the coprocessor and thus minimize data transfer.
c Upload A and B to the card, and do not deallocate them after the
c pragma. C is uploaded and downloaded back, but the allocated memory
c is retained
!DEC$ ATTRIBUTES OFFLOAD : MIC :: SGEMM
!DEC$ OFFLOAD TARGET( MIC:0 ) IN( N ), &
!DEC$ IN( A: LENGTH( N * N ) ALLOC_IF(.TRUE.) FREE_IF(.FALSE.)), &
!DEC$ IN( B: LENGTH( N * N ) ALLOC_IF(.TRUE.) FREE_IF(.FALSE.)), &
!DEC$ INOUT( C: LENGTH( N * N ) ALLOC_IF(.TRUE.) FREE_IF(.FALSE.))
CALL SGEMM( 'N', 'N', N, N, N, 1.0, A, N, B, N, 1.0, C, N )
c Change C here
c Reuse A and B on the card, and upload the new C. Free all the
c memory on the card
!DEC$ ATTRIBUTES OFFLOAD : MIC :: SGEMM
!DEC$ OFFLOAD TARGET( MIC:0 ) IN( N ), &
!DEC$ NOCOPY( A: LENGTH( N * N ) ALLOC_IF(.FALSE.) FREE_IF(.TRUE.)), &
!DEC$ NOCOPY( B: LENGTH( N * N ) ALLOC_IF(.FALSE.) FREE_IF(.TRUE.)), &
!DEC$ INOUT( C: LENGTH( N * N ) ALLOC_IF(.FALSE.) FREE_IF(.TRUE.))
CALL SGEMM( 'N', 'N', N, N, N, 1.0, A, N, B, N, -1.0, C, N )
/* Upload A and B to the card, and do not deallocate them after the pragma.
* C is uploaded and downloaded back, but the allocated memory is retained. */
#pragma offload target(mic:0) \
in(A: length(matrix_elements) alloc_if(1) free_if(0)) \
in(B: length(matrix_elements) alloc_if(1) free_if(0)) \
in(transa, transb, N, alpha, beta) \
inout(C:length(matrix_elements) alloc_if(1) free_if(0))
{
sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N,
&beta, C, &N);
}
/* Change C here */
/* Reuse A and B on the card, and upload the new C. Free all the memory on
* the card. */
#pragma offload target(mic:0) \
nocopy(A: length(matrix_elements) alloc_if(0) free_if(1)) \
nocopy(B: length(matrix_elements) alloc_if(0) free_if(1)) \
in(transa, transb, N, alpha, beta) \
inout(C:length(matrix_elements) alloc_if(0) free_if(1))
{
sgemm(&transa, &transb, &N, &N, &N, &alpha, A, &N, B, &N,
&beta, C, &N);
}