/*
 * Copyright (C) 1998 by CERN/IT/PDP/DM
 * All rights reserved
 */

#ifndef lint
static char sccsid[] = "@(#)hpss_common.c	1.6 07/06/99  CERN IT-PDP/DM Olof Barring";
#endif /* not lint */

/* hpss_common.c       Remote File I/O - read/write an HPSS file        */


#include <syslog.h>             /* system logger 			*/
#if !defined(linux)
extern char *sys_errlist[];     /* system error list                    */
#endif

/*
 * System remote file I/O definitions
 */

#define RFIO_KERNEL     1 
#include "rfio.h"  

/*
 *  - Remote File I/O client interface to HPSS.
 *  Content:
 *    rfio_setcos()
 *    rfio_readlist()
 *    rfio_writelist()
 *    rfio_commonlist()
 *    rfio_setup_ports()      (static)
 *    rfio_setbuffs()         (static)
 *    rfio_setup_shm()        (static)
 *    rfio_check_chld()       (static)
 *    rfio_common_cleanup()   (static)
 *    rfio_setup_handlers()   (static)
 *    rfio_transfer_data()    (static)
 *    rfio_getdatasock()      (static)
 *  Description:
 *    - rfio_setcos() permits change of Class of Service on an open remote
 *      file descriptor. The call will result in a hpss_SetCOSByHints() on the
 *      server side for which the rule is that the filedescriptor must be
 *      associated with a new file (can also be an old file opened with O_TRUNC)
 *      to which no data have been written. The rfio_setcos() provides the
 *      possibility to provide storage hints either as an explicit COS ID or
 *      by expected file size.
 *
 *    - rfio_readlist() and rfio_writelist() both maps to rfio_commonlist()
 *      which provides a RFIO client interface to the hpss_Readlist() and
 *      hpss_Writelist() calls. A process environment is setup with signal
 *      handlers, data send/receive sockets and shared memory. The usage should
 *      be to call these routines for large sequential data transfers. The
 *      call will return only when either the full data transfer has finished
 *      successfully or if an error has occured. The actual data transfers take
 *      place in child processes. In the child processes the control is given 
 *      to the application for each data transfer (with the size is given by the 
 *      buflen parameter) via a call to the "worker()" routine which should be
 *      provided by the application and declared:
 *      int (*worker)(int fd, char *buf, int buflen, int offset);
 *      where the first parameter is the remote file descriptor, the second
 *      is the data buffer, the third is the buffer length and the fourth is
 *      the offset in the file. In addition to being called for each data
 *      transfer the worker() routine is called with:
 *      + fd = -1 before the first tranfser in this child
 *      + bufsize<0 || offset<0 after the last transfer in this child
 *      + buf = NULL if a gap (of size buflen) has been detected in remote file 
 *      Note that data can arrive in any order.
 *      An example of the usage of these routines is given in rfcp.c
 *
 */
#if defined(HPSS)
int rfio_setcos();
int rfio_readlist();
int rfio_writelist();
int rfio_commonlist();
static int rfio_setup_ports();
static int rfio_setbuffs();
static int rfio_setup_shm();
static int rfio_check_chld();
static int rfio_common_cleanup();
static int rfio_setup_handlers();
static int rfio_transfer_data();
static int rfio_getdatasock();

enum readwrite {readlist,writelist};

static int parent_pid;
static struct counter {
  pid_t pid;
  time_t t_start;
  time_t t_end;
  int ns;
  int size;
  int status;
} *counters;
static int nb_counters = 0;
static int shmid = -1;
char function[10] = "";

int rfio_setcos(s,filesize,cosid)
int s,filesize,cosid;
{
  char *p;
  int req,status,rcode,msgsiz;

  TRACE(2,"rfio","rfio_setcos(%d, %d, %d)\n",s,filesize,cosid);
  p= rfio_buf; 
  marshall_WORD(p, RFIO_MAGIC);
  marshall_WORD(p, RQST_SETCOS) ; 
  marshall_LONG(p, filesize) ;
  marshall_LONG(p,cosid) ;
  TRACE(2,"rfio","rfio_setcos: writing %d bytes",RQSTSIZE) ;
  if (netwrite(s,rfio_buf,RQSTSIZE) != RQSTSIZE)  {
    TRACE(2,"rfio","rfio_setcos: write(): ERROR occured (errno=%d)",errno) ;
    END_TRACE() ;
    return -1 ; 
  }
  if ( netread(s,rfio_buf,RQSTSIZE) != RQSTSIZE ) {
    status = errno;
    TRACE(2,"rfio","rfio_setcos: read(): ERROR occured (errno=%d)", errno);
    END_TRACE();
    return -1;
  }
  p= rfio_buf ;
  unmarshall_WORD(p,req) ;	/* RQST_READLIST or RQST_WRITELIST */
  unmarshall_LONG(p,status) ;
  unmarshall_LONG(p, rcode) ;
  unmarshall_LONG(p, msgsiz) ;
  if ( status < 0 ) {
    rfio_errno= rcode ;
    if ( rcode == 0 ) 
      serrno = SENORCODE ;
  }
  return(status);
}

int rfio_readlist(s,offset,size,nb_ports,worker,buffer,buflen)
int s,offset,size,nb_ports,buflen;
char *buffer;
/* 
 * worker() is a user supplied routine which takes four input arguments:
 * int     fd: remote file descriptor
 * char  *buf: data buffer
 * int buflen: length of databuffer
 * int offset: offset in file for this datasegment
 */
int (*worker)(int, char *, int, int);
{
  enum readwrite rw;
  rw = readlist;
  return(rfio_commonlist(s,offset,size,nb_ports,worker,buffer,buflen,rw));
}

int rfio_writelist(s,offset,size,nb_ports,worker,buffer,buflen)
int s,offset,size,nb_ports,buflen;
char *buffer;
/* 
 * worker() is a user supplied routine which takes four input arguments:
 * int     fd: remote file descriptor
 * char  *buf: data buffer
 * int buflen: length of databuffer
 * int offset: offset in file for this datasegment
 */
int (*worker)(int, char *, int, int);
{
  enum readwrite rw;
  rw = writelist;
  return(rfio_commonlist(s,offset,size,nb_ports,worker,buffer,buflen,rw));
}

int rfio_commonlist(s,offset,size,nb_ports,worker,buffer,buflen,rw)
int s,offset,size,nb_ports,buflen;
enum readwrite rw;
char *buffer;
/* 
 * worker() is a user supplied routine which takes four input arguments:
 * int     fd: remote file descriptor
 * char  *buf: data buffer
 * int buflen: length of databuffer
 * int offset: offset in file for this datasegment
 */
int (*worker)(int, char *, int, int);
{
  int status ;
  int  rcode ;
  int msgsiz ;
  WORD   req ; 
  char   * p ; 		/* Pointer to buffer			*/
  int nbytes ;		/* Number of bytes to read		*/
  int reqid;
  int how = -1;
  time_t t;
  pid_t pid;
  int len,i;
  int hsize;
  int gap, gapsize, gapoffset;
  int total_bytes = 0;
  int total_status = 0;
  int ports[RFIO_MAX_PORTS];
  int socks[RFIO_MAX_PORTS];

  if ( rw == readlist ) (void) strcpy(function,"readlist");
  else (void) strcpy(function,"writelist");
  TRACE(1,"rfio","rfio_%s(%d,%d,%d,%d) entered",function,s,offset,size,nb_ports) ;
  if ( worker == NULL ) {
    TRACE(2, "rfio" ,"rfio_%s: no data receiving routine provided",function);
    errno= EINVAL ;
    END_TRACE();
    return -1;
  }
  time(&t);
  pid = getpid();
  parent_pid = pid;
  reqid = (pid | t << 16) & 0x7FFFFFFF;
  TRACE(2,"rfio","rfio_%s: HPSS request ID is %d",function,reqid);

  if ( nb_ports > RFIO_MAX_PORTS ) {
    TRACE(2, "rfio" ,"rfio_%s: too may ports (%d) requested. Max is %d",function,nb_ports,
	  RFIO_MAX_PORTS);
    END_TRACE();
    return -1;
  }
  status = rfio_setup_ports(nb_ports,socks,ports,rw);
  if ( status ) {
    TRACE(2, "rfio" ,"rfio_%s: rfio_setup_ports failed with status %d",function,status);
    for (i=0; i<nb_ports; i++) close(socks[i]);
    return(rfio_common_cleanup(-1));
  }
  status = rfio_setup_shm();
  if ( status ) {
    TRACE(2, "rfio" ,"rfio_%s: rfio_setup_shm failed with status %d",function,status);
    for (i=0; i<nb_ports; i++) close(socks[i]);
    return(rfio_common_cleanup(-1));
  }
  status = rfio_setup_handlers();
  if ( status ) {
    TRACE(2, "rfio" ,"rfio_%s: rfio_setup_handlers failed with status %d",function,status);
    for (i=0; i<nb_ports; i++) close(socks[i]);
    return(rfio_common_cleanup(-1));
  }
  p= rfio_buf ; 
  len = 6*LONGSIZE + nb_ports*LONGSIZE;
  marshall_WORD(p, RFIO_MAGIC);
  if ( rw == readlist ) {
    marshall_WORD(p, RQST_READLIST) ;
  } else {
    marshall_WORD(p,RQST_WRITELIST);
  }
  marshall_LONG(p,len) ;
  p = rfio_buf + RQSTSIZE;
  marshall_LONG(p,size) ;
  marshall_LONG(p,how) ;
  marshall_LONG(p,offset) ; 
  marshall_LONG(p,reqid);
  marshall_LONG(p,buflen);
  marshall_LONG(p,nb_ports) ;
  for (i=0; i<nb_ports; i++) marshall_LONG(p,ports[i]);

  TRACE(2,"rfio","rfio_%s: writing %d bytes",function,RQSTSIZE+len) ;
  if (netwrite(s,rfio_buf,RQSTSIZE+len) != RQSTSIZE+len)  {
    TRACE(2,"rfio","rfio_%s: write(): ERROR occured (errno=%d)", function,errno) ;
    for (i=0; i<nb_ports; i++) close(socks[i]);
    return(rfio_common_cleanup(-1));
  }
  counters[nb_counters].size = size;
  /*
   * Accept loop to break out of
   */
  for (;;) {
    fd_set read_set,write_set,excpt_set;
    struct timeval timeout;
    int nfd,ns;
    int maxfd = s;
    struct sockaddr_in addr;
    int addrlen;

    if ( counters == NULL ) break;
    FD_ZERO(&read_set);
    FD_ZERO(&write_set);
    FD_ZERO(&excpt_set);
    FD_SET(s,&read_set);
    timeout.tv_usec = 0;
    timeout.tv_sec = 30*60;      /* timeout for DEC HIPPI problems */
    for (i=0; i<nb_ports; i++) {
      FD_SET(socks[i],&read_set);
      if ( socks[i] > maxfd ) maxfd = socks[i];
    }
    maxfd++;
    total_bytes = 0;
    total_status = 0;
    for (i=0; i<nb_counters; i++) total_bytes += counters[i].size;
    for (i=0; i<=nb_counters; i++) total_status += counters[i].status;
    if ( total_status ) {
      TRACE(2,"rfio","rfio_%s: total_status = %d",function,total_status);
      break;
    }
    TRACE(2,"rfio","rfio_%s: %d bytes successfully transfered",function,total_bytes);
    if ( (nfd = select(maxfd,&read_set,&write_set,&excpt_set,&timeout)) > 0 ) {
      while (nfd--) {
	if ( counters == NULL ) goto end_of_transfer;
	if ( FD_ISSET(s,&read_set) ) {
	  FD_CLR(s,&read_set);
	  hsize = WORDSIZE + 5*LONGSIZE;
	  if ( netread(s,rfio_buf,hsize) != hsize ) {
	    total_status = errno;
	    TRACE(2,"rfio","rfio_%s: read(): ERROR occured (errno=%d)",function, errno);
	    goto end_of_transfer;
	  }
	  if ( counters == NULL ) goto end_of_transfer;
	  p= rfio_buf ;
	  unmarshall_WORD(p,req) ;	/* RQST_READLIST or RQST_WRITELIST */
	  unmarshall_LONG(p,status) ;
	  unmarshall_LONG(p, rcode) ;
	  unmarshall_LONG(p,gap) ;
	  unmarshall_LONG(p,gapoffset) ;
	  unmarshall_LONG(p,gapsize) ;
	  if ( !gap ) {
	    /*
	     * hpss_Readlist has returned due end of transfer or an error. 
	     */
	    total_status = status;
	    if ( status < 0 ) {
	      rfio_errno= rcode ;
	      if ( rcode == 0 ) 
		serrno = SENORCODE ;
	    }
	    goto end_of_transfer;
	  } else {
	    /*
	     * This is a gap: make sure we count the gapsize
	     */
	    TRACE(1,"rfio","rfio_%s: gap of size %d at offset %d",function,gapsize,gapoffset);
	    counters[s].size += gapsize;
	    rcode = worker(s,NULL,gapsize,gapoffset);
	    if ( rcode != gapsize ) {
	      counters[s].status = EIO;
	      goto end_of_transfer;
	    }
	    if ( counters == NULL ) goto end_of_transfer;
	    continue;
	  }
	} else {
	  for (i=0; !FD_ISSET(socks[i],&read_set) && i<nb_ports ; i++);
	  FD_CLR(socks[i],&read_set);
	  if ( i >= nb_ports ) {
	    TRACE(2,"rfio","rfio_%s: selected socket out of range",function);
	    continue;
	  }
	  addrlen = sizeof(addr);
	  ns = accept(socks[i],(struct sockaddr *)&addr,(int *)&addrlen);
	  if ( counters == NULL ) goto end_of_transfer;
	  if ( ns < 0 ) {
	    if ( errno != EINTR ) {
	      serrno = errno;
	      TRACE(2,"rfio","rfio_%s: accept(): %s",function,sys_errlist[errno]);
	      for (i=0; i<nb_ports; i++) close(socks[i]);
	      total_status = -1;
	      goto end_of_transfer;
	    }
	    continue;
	  }
	  switch (fork()) {
	  case -1 :
	    counters[ns].status = errno;
	    TRACE(2,"rfio","rfio_%s: fork(): ERROR occured (errno=%d)",function, errno) ;
	    close(ns);
	    for (i=0; i<nb_ports; i++) close(socks[i]);
	    return(rfio_common_cleanup(-1));
	    break;
	  case 0:    /* Child */
	    for (i=0; i<nb_ports; i++) close(socks[i]);
	    counters[ns].ns = ns;
            (void) rfio_setsockopts(ns);
	    status = rfio_transfer_data(s,ns,reqid,worker,buffer,buflen,rw);
	    exit(0);
	    break;
	  default :
	    /*
	     * Parent. Don't close(ns) because ns is used as index in counters.
	     * Instead we close it when cleaning up after a child exit.
	     */
	    break;
	  }
	}
      } 
    } else {
      if ( nfd == 0 ) {
	serrno = SETIMEDOUT;
	(void) syslog(LOG_ALERT,"rfio_%s: select(): timeout for request id %d",function,reqid);
	total_status = -1;
	goto end_of_transfer;
      }	
      if ( errno != EINTR ) {
	serrno = errno;
	TRACE(2,"rfio","rfio_%s: select(): %s",function,sys_errlist[errno]);
	total_status = -1;
	goto end_of_transfer;
      }
      if ( counters == NULL ) goto end_of_transfer;
    } 
  }
end_of_transfer:
  for (i=0; i<nb_ports; i++) close(socks[i]);
  return(rfio_common_cleanup(total_status));
}

#define NW_ERROR(A) \
    { \
      int __i; int __errno = errno; \
      syslog(LOG_ALERT, "rfio: %s: %s", #A,sys_errlist[__errno]); \
      TRACE(2, "rfio","rfio_setup_ports: (line %d) %s: ERROR occured (errno=%d)",__LINE__,#A,__errno) ; \
      for (__i=0; __i<nb; __i++) if ( socks[__i] ) close(socks[i]); \
      return(-1); \
    }

static int rfio_setup_ports(nb,socks,ports,rw)
int nb;
enum readwrite rw;
int socks[],ports[];
{
  struct sockaddr_in sin;
  char *p ;
  int bufsize ;
  int i,j,len;

  TRACE(1,"rfio","rfio_setup_ports(%d) entered",nb) ;
  (void) memset(socks,'\0',nb*sizeof(int));
  (void) memset(ports,'\0',nb*sizeof(int));
  for (i=0; i<nb; i++) {
    (void) memset(&sin,'\0',sizeof(struct sockaddr_in));
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = htonl(INADDR_ANY);

    if ( (socks[i] = socket(AF_INET,SOCK_STREAM,0)) == -1 ) NW_ERROR(socket());

    sin.sin_port = 0;
    
    if ( bind(socks[i],(struct sockaddr *)&sin,sizeof(struct sockaddr_in)) == -1 ) NW_ERROR(bind());
    len = sizeof(sin);
    if ( getsockname(socks[i],(struct sockaddr *)&sin,&len) == -1 ) NW_ERROR(getsockname());
    ports[i] = ntohs(sin.sin_port);
    
    if (listen(socks[i], SOMAXCONN) == -1) NW_ERROR(listen());

    (void) rfio_setbuffs(socks[i],rw);
    (void) rfio_setsockopts(socks[i]);
  }
  return(0);
}

static int rfio_setbuffs(s,rw)
int s;
enum readwrite rw;
{
  int setsock_ceiling,maxbuf,j;

  setsock_ceiling = 256 * 1024;
  for (j = setsock_ceiling ; j >= 16 * 1024 ; j >>= 1) {
    maxbuf = j;
    if ( (rw == readlist  && setsockopt(s,SOL_SOCKET,SO_RCVBUF,
					(char *)&maxbuf,sizeof(maxbuf))<0) ||
	 (rw == writelist && setsockopt(s,SOL_SOCKET,SO_SNDBUF,
					(char *)&maxbuf,sizeof(maxbuf))<0) ) {
      if ( errno == ENOBUFS ) continue;
      if ( rw == readlist ) TRACE(2,"rfio","rfio_setbuffs(): setsockopt(%d,SO_RCVBUF): %s\n",
				  s,sys_errlist[errno]);
      else                  TRACE(2,"rfio","rfio_setbuffs(): setsockopt(%d,SO_SNDBUF): %s\n",
				  s,sys_errlist[errno]);
    }
    if (rw == readlist ) TRACE(2,"rfio","rfio_setbuffs(): socket %d: receive buffer %d\n",
			       s,maxbuf);
    else TRACE(2,"rfio","rfio_setbuffs(): socket %d: send buffer %d\n",
	       s,maxbuf);
    break;
  }
  return(0);
}
static int rfio_setsockopts(int s) {
  int yes;

  yes = 1;
  if ( setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,&yes,sizeof(yes)) == -1 ) {
    TRACE(2,"rfio","rfio_setsockopts(): setsockopt(%d,SO_KEEPALIVE): %s\n",
	s,sys_errlist[errno]);
  }
  return(0);
}

#include <sys/ipc.h>
#include <sys/shm.h>
#define SHM_ERROR(A) \
    { \
      TRACE(2, "rfio","rfio_setup_shm: %s: ERROR occured (errno=%d)",#A,errno) ; \
      if ( counters != NULL ) (void) shmdt((void *)counters); \
      if ( shmid>=0 ) (void) shmctl(shmid,IPC_RMID,0); \
      counters = NULL; \
      shmid = -1; \
      return(-1); \
    }
static int rfio_setup_shm()
{
  int i;

  TRACE(1,"rfio","rfio_setup_shm entered") ;
  if ( nb_counters ) {
    TRACE(1,"rfio","rfio_setup_shm: shm already declared") ;
    return 0;
  }
  counters = NULL;
  shmid = -1;
  nb_counters = FD_SETSIZE;
  TRACE(1,"rfio","rfio_setup_shm: setup shm with %d counters (size = %d)",nb_counters+1,
	sizeof(struct counter)*(nb_counters+1));
  if ( (shmid=shmget(IPC_PRIVATE,sizeof(struct counter)*(nb_counters+1),
		     IPC_CREAT | 0766)) == -1 ) SHM_ERROR(shmget());
  if ( (counters = (struct counter *) shmat(shmid,(void *)0,0)) == NULL ) SHM_ERROR(shmat());
  TRACE(1,"rfio","rfio_setup_shm: shmid = %d, nb_counters=%d, shm start address=0x%8.8x",
	shmid,nb_counters,counters);
  if ( nb_counters != 0 ) (void) memset(counters,0,sizeof(struct counter)*nb_counters);
  /*
   * save parent pid
   */
  counters[nb_counters].pid = parent_pid;
  return 0;
}

#include <signal.h>
#include <sys/wait.h>
static int rfio_check_chld(sig)
int sig;
{
  pid_t pid,my_pid;
  int status;
  int i;
  my_pid = getpid();
  TRACE(1, "rfio","rfio_check_chld: pid %d received signal %d",my_pid,sig) ; 
  while ((pid = waitpid((pid_t)-1,&status,WNOHANG))>0) {
    TRACE(1, "rfio","rfio_check_chld: pid %d waited for pid %d, status=%d",my_pid,pid,status) ; 
    if ( counters != NULL ) {
      for (i= 0; i<nb_counters; i++) if (counters[i].pid == pid) break;
      if ( i<nb_counters ) {
	TRACE(1, "rfio","rfio_check_chld: pid %d close socket %d from child %d",my_pid,
	      counters[i].ns,pid) ; 
	counters[i].pid = 0;
	close(counters[i].ns);
      }
    }
  }
  return 0;
}
static int rfio_common_cleanup(sig)
int sig;
{
  pid_t pid;
  int i,total_bytes;
  int retry = 5;

  pid = getpid();
  TRACE(1, "rfio","rfio_%s_cleanup: pid %d (parent %d) signal %d",function,pid,parent_pid,sig) ; 
  if ( nb_counters == 0 || counters == NULL ) return(sig<0 ? sig : -sig);
  if ( pid == parent_pid ) {
    /* 
     * Parent cleanup: check size and retry if not OK.
     *   After that kill and wait out all babies. Then detach and remove shm
     */
    counters[nb_counters].status = 1;
    while ( retry ) {
      total_bytes = 0;
      for (i=0;i<nb_counters; i++) total_bytes+=counters[i].size;
      if ( total_bytes != counters[nb_counters].size ) {
	retry--;
	TRACE(2,"rfio",
	      "rfio_%s_cleanup: (parent) incomplete transfer (%d out of %d), re-check in 1 sec.",
	      function,total_bytes,counters[nb_counters].size);
	(void) sleep(1);
      }
      else retry = 0;
    }
    for (i=0; i<nb_counters; i++) {
      if ( counters[i].pid ) {
	TRACE(2,"rfio","rfio_%s_cleanup: (parent) kill %d",function,counters[i].pid);
	(void) kill(counters[i].pid,SIGUSR1);
	counters[i].pid = 0;
      }
    }
    TRACE(2,"rfio","rfio_%s_cleanup: (parent) waiting out all childs",function);
    while (wait(0)>0);
    total_bytes = 0;
    for (i=0;i<nb_counters; i++) total_bytes+=counters[i].size;
    if ( total_bytes != counters[nb_counters].size ) {
      TRACE(2,"rfio","rfio_%s_cleanup: (parent) transfer incomplete.",function);
      serrno = EIO;
      total_bytes = -1;
    }
    TRACE(2,"rfio","rfio_%s_cleanup: (parent) removing shm for counters (id=%d, addr=0x%x)\n",
	  function,shmid,counters);
    if ( counters != NULL ) (void) shmdt((void *)counters);
    if ( shmid>=0 ) (void) shmctl(shmid,IPC_RMID,0); 
    shmid = -1;
    counters = NULL;
    if ( sig == SIGINT ) {
      TRACE(2,"rfio","rfio_%s_cleanup: (parent) SIGINT received, doing immediate exit\n",function);
      exit(1);
    }
    (void) rfio_setup_handlers();
    END_TRACE();
    if ( sig ) return(sig<0 ? sig : -sig);
    else return(total_bytes);
  } else {
    /* 
     * Child cleanup: revoke handlers (to avoid SIGUSR1 during exit()) and exit 
     */
    (void) rfio_setup_handlers();
    TRACE(2,"rfio","rfio_%s_cleanup: child pid %d exiting\n",function,pid);
    exit(sig);
  }    
}

/*
 * Toggle signal handling
 */
#define SIGNAL_LIST {SIGINT,SIGQUIT,SIGABRT,SIGBUS,SIGSEGV,SIGPIPE, \
                     SIGALRM,SIGTERM,SIGUSR1,SIGUSR2}
#define SIGNAL_LIST_LEN 10

static int rfio_setup_handlers()
{
  static struct sigaction action[SIGNAL_LIST_LEN+1], old_action[SIGNAL_LIST_LEN+1], tmp;
  static int nb_calls = 0;
  sigset_t sigset;
  int sigs_to_trap[SIGNAL_LIST_LEN] = SIGNAL_LIST;
  int i;

  nb_calls++;
  if ( nb_calls % 2 ) {
    TRACE(1, "rfio","rfio_setup_handlers entered to setup handlers") ; 
    (void) memset(&action[0],'\0',sizeof(action));
    (void) memset(&old_action[0],'\0',sizeof(old_action));
    (void) sigemptyset(&sigset);             /* Don't block SIGCHLD anywhere */
    for (i=0;i<=SIGNAL_LIST_LEN;i++) (void) sigaddset(&sigset,sigs_to_trap[i-1]);
    action[0].sa_mask = sigset;
    action[0].sa_handler = (void (*)(int))rfio_check_chld;
    (void) sigaction(SIGCHLD,&action[0],&old_action[0]);
    for (i=1;i<=SIGNAL_LIST_LEN;i++) {
      action[i].sa_mask = sigset;
      action[i].sa_handler = (void (*)(int))rfio_common_cleanup;
    }
  } else {
    TRACE(1, "rfio","rfio_setup_handlers entered to reset handlers") ; 
    (void) memcpy(&action[0],&old_action[0],sizeof(action));
    (void) sigaction(SIGCHLD,&action[0],&old_action[0]);
  }
  for (i=1;i<=SIGNAL_LIST_LEN;i++) (void) sigaction(sigs_to_trap[i-1],&action[i],&old_action[i]);
  return(0);
}

#include <u_signed64.h>
#include <hpss_errno.h>
#include <mvr_protocol.h>
#include <pdata.h>
static int rfio_getdatasock(s,addr_desc)
int s;
initiator_ipaddr_t **addr_desc;
{
   static struct sockaddr_in from,myaddr;
   int fromlen,mylen,newsock;
   struct hostent *fromhp, *myhp;
   char *from_if, *my_if, *p;

   newsock = -1; 
   *addr_desc = NULL;
   if ( s<0 ) return(newsock);
   fromlen = sizeof(from);
   if ( getpeername(s,(struct sockaddr *)&from,&fromlen) < 0 ) {
     TRACE(2,"rfio","rfio_getdatasock(%d): getpeername(): %s",s,sys_errlist[errno]);
     return(newsock);
   }
   fromhp = gethostbyaddr((char *)(&(from.sin_addr)), sizeof(struct in_addr), from.sin_family);
   if ( fromhp == NULL ) return(newsock);
   TRACE(2,"rfio","rfio_getdatasock(%d): connection from host %s",s,fromhp->h_name);
   if ( ( p = strstr(fromhp->h_name,".")) != NULL ) *p = '\0';
   from_if = getconfent("NET",fromhp->h_name,0);
   if ( from_if == NULL || !strncmp(fromhp->h_name,from_if,strlen(from_if)) ) {
     TRACE(2,"rfio","rfio_getdatasock(%d): from_if = %s",s,(from_if!=NULL ? from_if : "nil") );
     return(newsock);
   }
   mylen = sizeof(myaddr);
   if ( getsockname(s,(struct sockaddr *)&myaddr,&mylen) < 0 ) {
     TRACE(2,"rfio","rfio_getdatasock(%d): getsockname(): %s",s,sys_errlist[errno]);
     return(newsock);
   }
   myhp = gethostbyaddr((char *)(&(myaddr.sin_addr)), sizeof(struct in_addr), myaddr.sin_family);
   if ( myhp == NULL ) {
     TRACE(2,"rfio","rfio_getdatasock(%d): gethostbyaddr(): %s",s,sys_errlist[errno]);
     return(newsock);
   }
   TRACE(2,"rfio","rfio_getdatasock(%d): local interface is %s",s,myhp->h_name);
   if ( ( p = strstr(myhp->h_name,".")) != NULL ) *p = '\0';
   my_if = getconfent("NET",myhp->h_name,0);
   if ( my_if == NULL || !strncmp(myhp->h_name,my_if,strlen(my_if)) ) {
     TRACE(2,"rfio","rfio_getdatasock(%d): my_if = %s",s,(my_if!=NULL ? my_if : "nil"));
     return(newsock);
   }
   /*
    * we are here because neither local or remote IFs are optimal. Open a
    * socket on our optimal interface and hope that remote can connect to it.
    */
   myhp = gethostbyname(my_if);
   if ( myhp == NULL ) {
     TRACE(2,"rfio","rfio_getdatasock(%d): gethostbyname(%s): %s",s,my_if,sys_errlist[errno]);
     return(newsock);
   }
   newsock = socket(AF_INET,SOCK_STREAM,0);
   myaddr.sin_family = AF_INET;
   myaddr.sin_addr.s_addr = ((struct in_addr *)(myhp->h_addr))->s_addr;
   myaddr.sin_port = 0;
   if ( bind(newsock,(struct sockaddr *)&myaddr,sizeof(struct sockaddr_in)) == -1) {
     TRACE(2,"rfio","rfio_getdatasock(%d), cannot bind(%d): %s",s,newsock,sys_errlist[errno]);
     close(newsock);
     return(-1);
   }
   if ( listen(newsock,SOMAXCONN) == -1 ) {
     TRACE(2,"rfio","rfio_getdatasock(%d), cannot listen(%d): %s",s,newsock,sys_errlist[errno]);
     close(newsock);
     return(-1);
   } 
   mylen = sizeof(myaddr);
   if ( getsockname(newsock,(struct sockaddr *)&myaddr,&mylen) == -1 ) {
     TRACE(2,"rfio","rfio_getdatasock(%d), getsockname(): %s",s,sys_errlist[errno]);
     close(newsock);
     return(-1);
   }
   TRACE(2,"rfio","rfio_getdatasock(%d): bound port %d to address %s",
	 s,ntohs(myaddr.sin_port),(char *)inet_ntoa(myaddr.sin_addr));
   *addr_desc = (initiator_ipaddr_t *)malloc(sizeof(initiator_ipaddr_t));
   memset(*addr_desc,'\0',sizeof(initiator_ipaddr_t));
   (*addr_desc)->IpAddr.SockAddr.addr = ntohl(myaddr.sin_addr.s_addr);
   (*addr_desc)->IpAddr.SockAddr.port = ntohs(myaddr.sin_port);
   (*addr_desc)->IpAddr.SockAddr.family = myaddr.sin_family;
   (void)rfio_setsockopts(newsock);
   return(newsock);
}

static int rfio_transfer_data(s,ns,reqid,worker,buffer,buflen,rw)
int s,ns,buflen;
enum readwrite rw;
char *buffer;
/* 
 * worker() is a user supplied routine which takes four arguments:
 * int     fd: remote file descriptor
 * char  *buf: data buffer
 * int buflen: length of databuffer
 * int offset: offset in file for this datasegment
 */
int (*worker)(int, char *, int, int);
{
  int status,bytesReceived,bytesToSend;
  initiator_msg_t initMessage,initReply;
  completion_msg_t completionMessage;
  int BufferSize,rc,offset,i;
  pid_t pid;
  struct sockaddr_in from;
  int fromlen, data_listensock, data_sock, cntl_sock;
  initiator_ipaddr_t *dataaddr_desc;

  cntl_sock = ns;
  data_sock = data_listensock = -1;
  dataaddr_desc = (initiator_ipaddr_t *)NULL;
  TRACE(2,"rfio","rfio_transfer_data(%d,%d,%d) entered",s,ns,reqid);
  pid = getpid();
  data_listensock = rfio_getdatasock(cntl_sock,&dataaddr_desc);
  TRACE(2,"rfio","rfio_transfer_data (pid %d): use data listen socket %d",pid,data_listensock);
  BufferSize = buflen;

  counters[ns].pid = pid;
  for (;;) {
    (void) memset(&initMessage,'\0',sizeof(initMessage));
    (void) memset(&initReply,'\0',sizeof(initReply));
    status = 0;
    for (i=0; i<=nb_counters; i++) status+=counters[i].status;
    if ( status ) {
      TRACE(2,"rfio","rfio_transfer_data (pid %d): end transfer due to total status %d",
	    pid,status);
      break;
    }
    status = mvrprot_recv_initmsg(cntl_sock,&initMessage);
    if (status != HPSS_E_NOERROR) {
      if ( status == HPSS_ECONN ) break;
      TRACE(2,"rfio","rfio_transfer_data (pid=%d): mvrprot_recv_initmsg returns %d",pid,status);
      counters[ns].status = status;
      break;
    }
    if ( !counters[ns].t_start ) (void) time(&counters[ns].t_start);

    if ( data_listensock >= 0 )
      initReply.Flags = MVRPROT_COMP_REPLY | MVRPROT_ADDR_FOLLOWS | MVRPROT_HOLD_RESOURCES;
    else
      initReply.Flags = MVRPROT_COMP_REPLY;

    initReply.Type = initMessage.Type;
    initReply.Offset = initMessage.Offset;

    if ( gt64m(initMessage.Length,cast64m(BufferSize)))
      initReply.Length = cast64m(BufferSize);
    else
      initReply.Length = initMessage.Length;

    status = mvrprot_send_initmsg(cntl_sock,&initReply);
    if ( status != HPSS_E_NOERROR ) {
      TRACE(2,"rfio","rfio_transfer_data (pid=%d): mvrprot_send_initmsg returned %d",pid,status);
      counters[ns].status = status;
      break;
    }
    if ( initMessage.Type != NET_ADDRESS ) {
      TRACE(2,"rfio","message type is not NET_ADDRESS, (type = %d)",initMessage.Type);
      continue;
    } else {
      if ( data_listensock >= 0 ) {
	dataaddr_desc->IpAddr.SockTransferID = cast64m(reqid);
	status = mvrprot_send_ipaddr(cntl_sock,dataaddr_desc);
	if ( status != HPSS_E_NOERROR ) {
	  TRACE(2,"rfio","rfio_transfer_data (pid=%d): mvrprot_send_ipaddr returned %d",pid,status);
	  counters[ns].status = status;
	  break;
	}
	if ( data_sock < 0 ) {
	  fromlen = sizeof(from);
	  data_sock = accept(data_listensock,(struct sockaddr *)&from,&fromlen);
	  if ( data_sock < 0 ) {
	    TRACE(2,"rfio","rfio_transfer_data (pid=%d): accept(%d): %s\n",
		  pid,data_listensock,sys_errlist[errno]);
	    shutdown(data_listensock,2);
	    close(data_listensock);
	    data_listensock = -1;
	    data_sock = cntl_sock;
	  }
	  if ( getpeername(data_sock,(struct sockaddr *)&from,&fromlen) < 0 ) {
	    TRACE(2,"rfio","rfio_transfer_data (pid=%d): getpeername(): %s",
		  pid,sys_errlist[errno]);
	  }
	  TRACE(2,"rfio","rfio_transfer_data (pid=%d): %s using data socket %d",
		pid,(char *)inet_ntoa(from.sin_addr),data_sock);
	  rfio_setbuffs(data_sock,rw);
          rfio_setsockopts(data_sock);
	}
      }
      if ( data_sock < 0 ) {
	data_sock = cntl_sock;
        rfio_setbuffs(data_sock,rw);
        rfio_setsockopts(data_sock);
      }
      bytesReceived = 0;
      bytesToSend = cast32m(initMessage.Length);
      if ( rw == readlist ) {
	/*
	 * hpss_ReadList
	 */
	status = mover_socket_recv_data(data_sock,cast64m(reqid),
					initMessage.Offset,
					buffer,
					low32m(initReply.Length),
					&bytesReceived,1);
	if ( status <= 0 ) {
	  TRACE(2,"rfio","rfio_transfer_data (pid=%d): mover_socket_recv_data() returned %d",
		pid,status);
	  counters[ns].status = status;
	  break;
	}
	offset = cast32m(initMessage.Offset);
	rc = worker(s,buffer,bytesReceived,offset);
	if ( rc != bytesReceived ) {
	  TRACE(2,"rfio","rfio_transfer_data (pid=%d): worker returns %d, expected %d",pid,rc,
		bytesReceived);
	  counters[ns].status = EIO;
	  break;
	}
	counters[ns].size += bytesReceived;
      } else {
	/*
	 * hpss_WriteList
	 */
	offset = cast32m(initMessage.Offset);
	rc = worker(s,buffer,bytesToSend,offset);
	if ( rc != bytesToSend ) {
	  TRACE(2,"rfio","rfio_transfer_data: worker returns %d, expected %d",rc,
		bytesToSend);
	  counters[ns].status = EIO;
	  break;
	}
	status = mover_socket_send_requested_data(data_sock,cast64m(reqid),
						  initMessage.Offset,
						  buffer,
						  low32m(initReply.Length),
						  &bytesToSend,1);
	if ( status <= 0 ) {
	  TRACE(2,"rfio","rfio_transfer_data (pid=%d): mover_socket_send_requested_data() returned %d",
		pid,status);
	  counters[ns].status = status;
	  break;
	}
	counters[ns].size += bytesToSend;
      }
    }

    status = mvrprot_recv_compmsg(cntl_sock,&completionMessage);
    if ( status != HPSS_E_NOERROR ) {
      TRACE(2,"rfio","rfio_transfer_data (pid=%d): mvprot_recv_compmsg() returns %d",
	    pid,status);
      counters[ns].status = status;
      break;
    }
  } /* end for (;;) */
  (void) time(&counters[ns].t_end);
  if ( data_sock != cntl_sock ) close(data_sock);
  if ( data_listensock >= 0 ) {shutdown(data_listensock,2); close(data_listensock);};
  close(cntl_sock);
  rc = worker(s,NULL,-1,-1);
  return(rfio_common_cleanup(0));
}
#endif /* HPSS */

