Intel® C++ Compiler 16.0 User and Reference Guide
Example 4 demonstrates a linearized 2d stencil using embedded offsets and calling methods on the primitive.
#include <sdlt/sdlt.h> // Typical C++ object to represent a pixel in an image struct RGBs { float red; float green; float blue; RGBs() {} RGBs(const RGBs &iOther) : red(iOther.red) , green(iOther.green) , blue(iOther.blue) { } RGBs & operator =(const RGBs &iOther) { red = iOther.red; green = iOther.green; blue = iOther.blue; return *this; } RGBs operator + (const RGBs &iOther) const { RGBs sum; sum.red = red + iOther.red; sum.green = green + iOther.green; sum.blue = blue + iOther.blue; return sum; } RGBs operator * (float iScalar) const { RGBs scaledColor; scaledColor.red = red * iScalar; scaledColor.green = green * iScalar; scaledColor.blue = blue * iScalar; return scaledColor; } }; SDLT_PRIMITIVE(RGBs, red, green, blue) const int StencilHaloSize = 1; const int width = 1920; const int height = 1080; template<typename AccessorT> void loadImageStub(AccessorT) {} template<typename AccessorT> void saveImageStub(AccessorT) {} // performs average color filtering with neighbors left,right,above,below void main(void) { // We are padding +-1 so we can avoid boundary conditions const int paddedWidth = width + 2 * StencilHaloSize; const int paddedHeight = height + 2 * StencilHaloSize; int elementCount = paddedWidth*paddedHeight; sdlt::soa1d_container<RGBs> inputImage(elementCount); sdlt::soa1d_container<RGBs> outputImage(elementCount); loadImageStub(inputImage.access()); SDLT_INLINE_BLOCK { const int endOfY = StencilHaloSize + height; const int endOfX = StencilHaloSize + width; for (int y = StencilHaloSize; y < endOfY; ++y) { // Embed offsets into Accessors to get the to correct row auto prevRow = inputImage.const_access((y - 1)*paddedWidth); auto curRow = inputImage.const_access(y*paddedWidth); auto nextRow = inputImage.const_access((y + 1)*paddedWidth); auto outputRow = outputImage.access(y*paddedWidth); #pragma omp simd for (int ix = StencilHaloSize; ix < endOfX; ++ix) { sdlt::linear_index x(ix); const RGBs color1 = curRow[x - 1]; const RGBs color2 = curRow[x]; const RGBs color3 = curRow[x + 1]; const RGBs color4 = prevRow[x]; const RGBs color5 = nextRow[x]; // Despite looking like AOS code, compiler is able to create // privatized instances and call inlinable methods on the objects // keeping the algorithm at very high level const RGBs sumOfColors = color1 + color2 + color3 + color4 + color5; const RGBs averageColor = sumOfColors*(1.0f / 5.0f); outputRow[x] = averageColor; } } } saveImageStub(outputImage.access()); }