Intel® C++ Compiler 16.0 User and Reference Guide

distribute_point

Instructs the compiler to prefer loop distribution at the location indicated.

Syntax

#pragma distribute_point

Arguments

None

Description

The distribute_point pragma is used to suggest to the compiler to split large loops into smaller ones; this is particularly useful in cases where optimizations like vectorization cannot take place due to excessive register usage.

The following rules apply to this pragma:

Example: Using the distribute_point pragma outside the loop

#define NUM 1024 
void loop_distribution_pragma1(
       double a[NUM], double b[NUM], double c[NUM],
       double x[NUM], double y[NUM], double z[NUM] ) {
  int i;

  // Before distribution or splitting the loop
  #pragma distribute_point
  for (i=0; i< NUM; i++) {
    a[i] = a[i] + i;
    b[i] = b[i] + i;
    c[i] = c[i] + i;
    x[i] = x[i] + i;
    y[i] = y[i] + i;
    z[i] = z[i] + i;
  } 
}

Example: Using the distribute_point pragma inside the loop

#define NUM 1024 
void loop_distribution_pragma2(
       double a[NUM], double b[NUM], double c[NUM],
       double x[NUM], double y[NUM], double z[NUM] ) {
  int i;

  // After distribution or splitting the loop.
  for (i=0; i< NUM; i++) {
    a[i] = a[i] +i;
    b[i] = b[i] +i;
    c[i] = c[i] +i;
    #pragma distribute_point
    x[i] = x[i] +i;
    y[i] = y[i] +i;
    z[i] = z[i] +i;
  } 
}

Example: Using the distribute_point pragma inside and outside the loop

void dist1(int a[], int b[], int c[], int d[]) {
  #pragma distribute_point 
    // Compiler will automatically decide where to 
    // distribute. Data dependency is observed.
  for (int i=1; i<1000; i++) {
    b[i] = a[i] + 1;
    c[i] = a[i] + b[i];
    d[i] = c[i] + 1;
  } 
}
 
void dist2(int a[], int b[], int c[], int d[]) {
  for (int i=1; i<1000; i++) {
    b[i] = a[i] + 1;

    #pragma distribute_point 
      // Distribution will start here, 
      // ignoring all loop-carried dependency.
      c[i] = a[i] + b[i];
      d[i] = c[i] + 1;
  } 
}