Changeset 878
- Timestamp:
- Aug 22, 2018, 3:24:22 PM (6 years ago)
- Location:
- branches/prototype-v0/zoo-project/zoo-services/utils/hpc
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/prototype-v0/zoo-project/zoo-services/utils/hpc/service.c
r862 r878 82 82 return SERVICE_FAILED; 83 83 } 84 unlink(flenv); 85 free(flenv); 86 84 87 85 SSHCON *test=ssh_connect(conf); 88 if(test==NULL){89 setMapInMaps(conf,"lenv","message",_("Unable to connect using throughssh."));90 return SERVICE_FAILED; 91 }86 /*if(test==NULL){ 87 setMapInMaps(conf,"lenv","message",_("Unable to connect using ssh.")); 88 return SERVICE_FAILED; 89 }*/ 92 90 93 91 char *logPath=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+12)*sizeof(char)); … … 201 199 close(fd); 202 200 setOutputValue(outputs,"Result",(char*)"\"FinalizeHPC run successfully\"",32); 201 unlink(flenv); 202 free(flenv); 203 203 204 204 return SERVICE_SUCCEEDED; 205 205 } 206 206 207 208 /** 209 * FinalizeHPC1 ZOO Service : 210 * This service is used to inform a ZOO-Kernel waiting for the end of the 211 * execution of a HPC service 212 * 213 * format="AllocCPUS"; for i in $(sacct -e) ; do format="$format,$i"; done; format="$(echo $format | sed "s:AllocCPUS,::")" ; echo $format; sacct --format=$format -p | grep "997f-11e8-9f78-0050569320d2" 214 * 215 * AllocCPUS,AllocGRES,AllocNodes,AllocTRES,Account,AssocID,AveCPU,AveCPUFreq,AveDiskRead,AveDiskWrite,AvePages,AveRSS,AveVMSize,BlockID,Cluster,Comment,ConsumedEnergy,ConsumedEnergyRaw,CPUTime,CPUTimeRAW,DerivedExitCode,Elapsed,Eligible,End,ExitCode,GID,Group,JobID,JobIDRaw,JobName,Layout,MaxDiskRead,MaxDiskReadNode,MaxDiskReadTask,MaxDiskWrite,MaxDiskWriteNode,MaxDiskWriteTask,MaxPages,MaxPagesNode,MaxPagesTask,MaxRSS,MaxRSSNode,MaxRSSTask,MaxVMSize,MaxVMSizeNode,MaxVMSizeTask,MinCPU,MinCPUNode,MinCPUTask,NCPUS,NNodes,NodeList,NTasks,Priority,Partition,QOS,QOSRAW,ReqCPUFreq,ReqCPUFreqMin,ReqCPUFreqMax,ReqCPUFreqGov,ReqCPUS,ReqGRES,ReqMem,ReqNodes,ReqTRES,Reservation,ReservationId,Reserved,ResvCPU,ResvCPURAW,Start,State,Submit,Suspended,SystemCPU,Timelimit,TotalCPU,UID,User,UserCPU,WCKey,WCKeyID 216 * 28||1|cpu=28,node=1|geosud|258|||||||||cluster||||00:00:56|56|0:0|00:00:02|2018-08-06T15:48:13|2018-08-06T15:48:16|0:0|1019|geosud|883299|883299|ZOO-Project_5bd1c32b-997f-11e8-9f78-0050569320d2_GSDBandMath_6_2_005||||||||||||||||||||28|1|muse044||4294360886|defq|qos_geosud|20|Unknown|Unknown|Unknown|Unknown|1||0n|1|cpu=1,node=1|||00:00:01|00:00:01|1|2018-08-06T15:48:14|COMPLETED|2018-08-06T15:48:13|00:00:00||UNLIMITED|00:00:00|1229|geosudwps|||0| 217 * 218 */ 219 ZOO_DLL_EXPORT int FinalizeHPC1(maps*& conf,maps*& inputs,maps*& outputs){ 220 // Retrieve the jobid corresponding to the identifier generated by SLURM 221 // by reading the file generated when running the SBATCH file 222 map* jobid=getMapFromMaps(inputs,"jobid","value"); 223 struct sockaddr_un addr; 224 char buf[100]="3"; 225 int fd,rc=NULL; 226 int i=0; 227 map* usid=getMapFromMaps(conf,"lenv","usid"); 228 map* tmpPath=getMapFromMaps(conf,"main","tmpPath"); 229 230 char *flenv = 231 (char *) malloc ((strlen (tmpPath->value) + 232 strlen (jobid->value) + 12) * sizeof (char)); 233 sprintf (flenv, "%s/%s_lenv.cfg", tmpPath->value, jobid->value); 234 maps* m = (maps *) malloc (MAPS_SIZE); 235 m->child=NULL; 236 m->next=NULL; 237 map* configId=NULL; 238 239 240 if(conf_read(flenv, m) != 2){ 241 configId=getMapFromMaps(m,"lenv","configId"); 242 setMapInMaps(conf,"lenv","configId",configId->value); 243 }else{ 244 setMapInMaps(conf,"lenv","message",_("Unable to read the lenv section file of the requested jobid")); 245 return SERVICE_FAILED; 246 } 247 248 SSHCON *test=ssh_connect(conf); 249 /*if(test==NULL){ 250 setMapInMaps(conf,"lenv","message",_("Unable to connect using ssh.")); 251 return SERVICE_FAILED; 252 }*/ 253 254 char *logPath=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+12)*sizeof(char)); 255 sprintf(logPath,"%s/exec_out_%s",tmpPath->value,jobid->value); 256 struct stat f_status; 257 int ts=stat(logPath, &f_status); 258 char* fcontent = NULL; 259 if(ts==0) { 260 fcontent=(char*)malloc(sizeof(char)*(f_status.st_size+1)); 261 FILE* f=fopen(logPath,"rb"); 262 fread(fcontent,f_status.st_size,1,f); 263 int fsize=f_status.st_size; 264 fcontent[fsize]=0; 265 fclose(f); 266 }else{ 267 setMapInMaps(conf,"lenv","message",_("No service with this jobid can be found")); 268 return SERVICE_FAILED; 269 } 270 free(logPath); 271 // Run scontrol to check if the service execution ended. 272 // Store all the informations returned by scontrol command as a cfg file to 273 // be parsed back by the ZOO-Kernel waiting for the execution of the remote 274 // service 275 maps* tmpMaps=createMaps("henv"); 276 277 map* tmpMap=getMapFromMaps(conf,configId->value,"remote_command_opt"); 278 char* command=(char*)malloc((126+strlen(tmpMap->value))*sizeof(char)); 279 sprintf(command,"sacct --format=%s -p | grep \"%s\" | sed \"s:||:|None|:g;s:||:|:g\"",tmpMap->value,jobid->value); 280 if(ssh_exec(conf,command,ssh_get_cnt(conf))==0){ 281 free(command); 282 setMapInMaps(conf,"lenv","message",_("Failed to run scontrol remotely")); 283 // TODO: check status in db and if available continue in other case return SERVICE_FAILED 284 return SERVICE_FAILED; 285 }else{ 286 free(command); 287 logPath=(char*)malloc((strlen(tmpPath->value)+strlen(usid->value)+11)*sizeof(char)); 288 sprintf(logPath,"%s/exec_out_%s",tmpPath->value,usid->value); 289 int ts=stat(logPath, &f_status); 290 if(ts==0) { 291 fcontent=(char*)malloc(sizeof(char)*(f_status.st_size+1)); 292 FILE* f=fopen(logPath,"rb"); 293 fread(fcontent,f_status.st_size,1,f); 294 int fsize=f_status.st_size; 295 fcontent[fsize]=0; 296 fclose(f); 297 free(logPath); 298 char *token, *saveptr; 299 char *token1, *saveptr1; 300 token = strtok_r (tmpMap->value, ",", &saveptr); 301 token1 = strtok_r (fcontent, "|", &saveptr1); 302 while (token != NULL) { 303 fprintf(stderr,"%s %d %s \n",__FILE__,__LINE__,token); 304 fflush(stderr); 305 fprintf(stderr,"%s %d %s %s \n",__FILE__,__LINE__,token,token1); 306 fflush(stderr); 307 if(token1 != NULL){ 308 if(tmpMaps->content==NULL) 309 tmpMaps->content=createMap(token,token1); 310 else 311 addToMap(tmpMaps->content,token,token1); 312 } 313 token = strtok_r (NULL, ",", &saveptr); 314 token1 = strtok_r (NULL, "|", &saveptr1); 315 } 316 }else{ 317 setMapInMaps(conf,"lenv","message",_("Unable to access the downloaded execution log file")); 318 return SERVICE_FAILED; 319 } 320 } 321 logPath=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+15)*sizeof(char)); 322 sprintf(logPath,"%s/exec_status_%s",tmpPath->value,jobid->value); 323 dumpMapsToFile(tmpMaps,logPath,0); 324 char *sname=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+21)); 325 sprintf(sname,"%s/.wait_socket_%s.sock",tmpPath->value,jobid->value); 326 if ( (fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { 327 perror("socket error"); 328 setMapInMaps(conf,"lenv","message",_("Socket error")); 329 return SERVICE_FAILED; 330 } 331 memset(&addr, 0, sizeof(addr)); 332 addr.sun_family = AF_UNIX; 333 strncpy(addr.sun_path, sname, sizeof(addr.sun_path)-1); 334 if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) { 335 perror("connect error"); 336 setMapInMaps(conf,"lenv","message",_("Unable to connect")); 337 return SERVICE_FAILED; 338 } 339 if (write(fd, "3", 1) != rc) { 340 if (rc < 0) { 341 perror("write error"); 342 setMapInMaps(conf,"lenv","message",_("Unable to announce the successful execution of the HPC service")); 343 close(fd); 344 return SERVICE_FAILED; 345 } 346 } 347 close(fd); 348 unlink(flenv); 349 free(flenv); 350 setOutputValue(outputs,"Result",(char*)"\"FinalizeHPC run successfully\"",32); 351 352 return SERVICE_SUCCEEDED; 353 } 354 207 355 }
Note: See TracChangeset
for help on using the changeset viewer.