Intel® C++ Compiler 16.0 User and Reference Guide

GAP Message (Diagnostic ID 30538)

Message

Moving the block of code that consists of a function-call (line %d), if-condition (line %d), and an early return (line %d) to outside the loop may enable parallelization of the loop at line %d.

Advice

Move the function call and an associated return from inside the loop (perhaps by inserting them before the loop) to help parallelize the loop.

This kind of function-leading-to-return inside a loop usually handles some error-condition inside the loop. If this error check can be done before starting the execution of the loop without changing the program semantics, the compiler may be able to parallelize the loop thus improving performance.

Example

Consider the following:

extern int num_nodes; 
typedef struct TEST_STRUCT {
    // Coordinates of city1
    float latitude1;
    float longitude1;
 
    // Coordinates of city2
    float latitude2;
    float longitude2; 
} test_struct;

extern int *mark_larger; 
extern float *distances, **matrix; 
extern test_struct** nodes; 
extern test_struct ***files; 
extern void init_node(test_struct *node, int i); 
extern void process_nodes(void); 
float compute_max_distance(void);
 
extern int check_error_condition(int width);
 
#include <math.h> 
#include <stdio.h>
 
void process_nodes(int width) 
{
  float const R = 3964.0;
  float temp, lat1, lat2, long1, long2, result, pat2;
  int m, j, temp1 = num_nodes;
 
      nodes = files[0];
      m = 1;
 
#pragma loop count min(4) 
#pragma parallel
      for (int k=0; k < temp1; k++) {
 
	        if (check_error_condition(width)) {
	            return;
	        }
 
         lat1 = nodes[k]->latitude1;
	        lat2 = nodes[k]->latitude2;
 
	        long1 = nodes[k]->longitude1;
	        long2 = nodes[k]->longitude2;
 
	        // Compute the distance between the two cities
	        temp = sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) * 
	                                              cos(long1-long2);
	        result = 2.0 * R * atan(sqrt((1.0-temp)/(1.0+temp)));
 
	        pat2 = 0;
	        for(j=0; j<width; j++) {
	          pat2 += distances[j];
	          matrix[k][j] = distances[k]+j;
	        }
	        // Store the distance computed in the distances array
	        if (result > distances[k]) {
	            distances[k] = result + pat2;
	        }
      } 
}

In this case, the compiler is unable to parallelize the loop at line 38.

If you determine it is safe to do so, you can modify the above code as follows:

extern int num_nodes; 
typedef struct TEST_STRUCT {
    // Coordinates of city1
    float latitude1;
    float longitude1;
 
    // Coordinates of city2
    float latitude2;
    float longitude2; 
} test_struct;
 
extern int *mark_larger; 
extern float *distances, **matrix; 
extern test_struct** nodes; 
extern test_struct ***files; 
extern void init_node(test_struct *node, int i); 
extern void process_nodes(void); 
float compute_max_distance(void);
 
extern int check_error_condition(int width);
 
#include <math.h> 
#include <stdio.h>
 
void process_nodes(int width) {
  float const R = 3964.0;
  float temp, lat1, lat2, long1, long2, result, pat2;
  int m, j, temp1 = num_nodes;
 
      nodes = files[0];
      m = 1;
 
      if (check_error_condition(width)) {
	  return;
      }
 
#pragma loop count min(4) 
#pragma parallel
      for (int k=0; k < temp1; k++) {
 
         lat1 = nodes[k]->latitude1;
	        lat2 = nodes[k]->latitude2;
 
	        long1 = nodes[k]->longitude1;
	        long2 = nodes[k]->longitude2;
 
	        // Compute the distance between the two cities
	        temp = sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) * 
	                                              cos(long1-long2);
	        result = 2.0 * R * atan(sqrt((1.0-temp)/(1.0+temp)));
 
	        pat2 = 0;
	        for(j=0; j<width; j++) {
	          pat2 += distances[j];
	          matrix[k][j] = distances[k]+j;
	        }
	        // Store the distance computed in the distances array
	        if (result > distances[k]) {
	            distances[k] = result + pat2;
	        }
      } 
}

Verify

Confirm that the function call does not rely on any computation inside the loop and that restructuring the code as suggested above, retains the original program semantics.