/*--------------------------------------------------------------------
 * FILE:
 *     recovery.c
 *
 * NOTE:
 *     This file is composed of the functions to call with the source
 *     at backend for the recovery.
 *     Low level I/O functions that called by in these functions are 
 *     contained in 'replicate_com.c'.
 *
 *--------------------------------------------------------------------
 */

/*--------------------------------------
 * INTERFACE ROUTINES
 *
 * I/O call:
 *      PGR_recovery_finish_send
 * master module:
 *      PGR_Master_Main(void);
 * recovery module:
 *      PGR_Recovery_Main
 *-------------------------------------
 */
#ifdef USE_REPLICATION

#include "postgres.h"
#include "postgres_fe.h"

#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <sys/wait.h>
#include <ctype.h>
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <netdb.h>
#include <netinet/in.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#include <sys/param.h>
#include <sys/select.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <sys/file.h>
#include <dirent.h>

#include "libpq/pqsignal.h"
#include "utils/guc.h"
#include "miscadmin.h"
#include "nodes/nodes.h"
#include "nodes/parsenodes.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "tcop/tcopprot.h"

#include "../interfaces/libpq/libpq-fe.h"
#include "../interfaces/libpq/libpq-int.h"
#include "../interfaces/libpq/fe-auth.h"

#include "replicate.h"

#ifdef WIN32
#include "win32.h"
#else
#ifdef HAVE_NETINET_TCP_H
#include <netinet/tcp.h>
#endif
#include <arpa/inet.h>
#endif

#ifndef HAVE_STRDUP
#include "strdup.h"
#endif
#ifdef HAVE_CRYPT_H
#include <crypt.h>
#endif

#ifdef MULTIBYTE
#include "mb/pg_wchar.h"
#endif

#ifdef HAVE_SIGPROCMASK
sigset_t	UnBlockSig,
			BlockSig,
			AuthBlockSig;

#else
int			UnBlockSig,
			BlockSig,
			AuthBlockSig;
#endif

#define RECOVERY_LOOP_END	(0)
#define RECOVERY_LOOP_CONTINUE	(1)
#define RECOVERY_LOOP_FAIL	(2)
char Local_Host_Name[HOSTNAME_MAX_LENGTH];

static int read_packet(int sock,RecoveryPacket * packet);
static int send_recovery_packet(int  sock, RecoveryPacket * packet);
static int send_packet(int * sock, RecoveryPacket * packet );
static void master_loop(int fd);
static int start_recovery_send(int * sock, ReplicateServerInfo * host);
static int stop_recovery_send(int * sock, ReplicateServerInfo * host);
static int rsync_pg_data(char * src , char * dest);
static int remove_dir(char * dir_name);
static int clear_bkup_dir(char * dir_name);
static int bkup_dir(char * dir_name);
static int restore_dir(char * dir_name);
static int rsync_global_dir(char * src, char * dest);
static int first_recovery(char * src, char * dest, char * dir);
static int second_recovery(char * src, char * dest, char * dir);
static int recovery_rsync(char * src , char * dest, int stage);
static int recovery_loop(int fd);
static void show_recovery_packet(RecoveryPacket * packet);
static int direct_send_packet(int packet_no);
static void set_recovery_packet(RecoveryPacket * packet, int packet_no);
static int sync_table_space(char * hostName, uint16_t portNum, char * userName);
static PGresult * get_table_space_location(char * hostName, uint16_t portNum, char * userName);
static int rsync_table_space(char * hostName, char * location);

int PGR_recovery_error_send(void);
int PGR_recovery_finish_send(void);
int PGR_Master_Main(void);
int PGR_Recovery_Main(void);

static int
read_packet(int sock,RecoveryPacket * packet)
{
	int r;
	char * read_ptr;
	int read_size = 0;
	int packet_size = 0;
	int cnt = 0;

	read_ptr = (char*)packet;
	packet_size = sizeof(RecoveryPacket);
	for (;;){
		r = recv(sock,read_ptr + read_size ,packet_size, MSG_WAITALL);
		if (r < 0){
			if (errno == EINTR)
			{
				continue;
			}
#ifdef EAGAIN
			else if (errno == EAGAIN)
			{
				usleep(PGR_RECV_WAIT_MSEC);
				continue;
			}
#endif /* EAGAIN */
#ifdef ECONNREFUSED
			else if (errno == ECONNREFUSED)
			{
				usleep(PGR_RECV_WAIT_MSEC);
				continue;
			}
#endif /* ECONNREFUSED */
#ifdef ENOTCONN
			else if (errno == ENOTCONN)
			{
				usleep(PGR_RECV_WAIT_MSEC);
				continue;
			}
#endif /* ENOTCONN */
			if (cnt < PGR_RECV_RETRY_CNT )
			{
				cnt ++;
				usleep(PGR_RECV_WAIT_MSEC);
				continue;
			}
			else
			{
				if (read_size == 0)
				{
					return -1;
				}
			}
			return -1;
		}
		if (r > 0)
		{
			read_size += r;
			if (read_size == packet_size)
			{
				show_recovery_packet(packet);
				return read_size;
			}
		}
		if ((r == 0) && (read_size == 0))
		{
			return -1;
		}
		if (cnt < PGR_RECV_RETRY_CNT )
		{
			cnt ++;
			usleep(PGR_RECV_WAIT_MSEC);
			continue;
		}
		else
		{
			return -1;
		}
	}
}

static int
send_recovery_packet(int  sock, RecoveryPacket * packet)
{

	char * send_ptr;
	int send_size= 0;
	int buf_size = 0;
	int s;
	int rtn;	
	fd_set	  wmask;
	struct timeval timeout;

	timeout.tv_sec = PGR_SEND_TIMEOUT;
	timeout.tv_usec = 0;

	/*
	 * Wait for something to happen.
	 */
	FD_ZERO(&wmask);
	FD_SET(sock,&wmask);
	rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
	if (rtn && FD_ISSET(sock, &wmask))
	{
		send_ptr = (char *)packet;
		buf_size = sizeof(RecoveryPacket);

		for (;;)
		{
			s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
			if (s < 0){
				if (errno == EINTR)
				{
					continue;
				}
				return STATUS_ERROR;
			}
			if (s > 0)
			{
				send_size += s;
				if (send_size == buf_size)
				{
					return STATUS_OK;
				}
			}
		}
	}
	return STATUS_ERROR;
}

static int
send_packet(int * sock, RecoveryPacket * packet )
{

	int count = 0;
	ReplicateServerInfo * host = NULL;

	host = PGR_get_replicate_server_info();
	if (host == (ReplicateServerInfo*)NULL)
	{
		return STATUS_ERROR;
	}
	count = 0;
	while (send_recovery_packet(*sock,packet) != STATUS_OK)
	{
		if (count < MAX_RETRY_TIMES )
		{
			count ++;
			continue;
		}
		count = 0;
		close(*sock);
		PGR_Set_Replication_Server_Status(host,DATA_ERR);
		host = PGR_get_replicate_server_info();
		if (host == (ReplicateServerInfo*)NULL)
		{
			return STATUS_ERROR;
		}
		PGR_Set_Replication_Server_Status(host,DATA_USE);
		PGR_Create_Socket_Connect(sock, host->hostName , host->recoveryPortNumber);
	}
	return STATUS_OK;
}

static void
master_loop(int fd)
{

	int count;
	int sock;
	int status = STATUS_OK;
	RecoveryPacket packet;
	int r_size = 0;
	bool loop_end = false;

	if (PGCluster_Info == NULL)
	{
		return;
	}
	count = 0;
	while ((status = PGR_Create_Acception(fd,&sock,"",PGCluster_Info->Com_Info.ConfData_Info.Recovery_Port_Number)) != STATUS_OK)
	{
		PGR_Close_Sock(&sock);
		sock = -1;
		if ( count > MAX_RETRY_TIMES)
		{
			return;
		}
		count ++;
	}
	for(;;)
	{
		int	rtn;
		fd_set	  rmask;
		struct timeval timeout;

		timeout.tv_sec = PGR_RECV_TIMEOUT;
		timeout.tv_usec = 0;

		/*
		 * Wait for something to happen.
		 */
		FD_ZERO(&rmask);
		FD_SET(sock,&rmask);
		rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
		if (rtn && FD_ISSET(sock, &rmask))
		{
			r_size = read_packet(sock,&packet);
			if (r_size <= 0)
			{
				continue;
			}
		}
		else
		{
			continue;
		}
		switch (ntohs(packet.packet_no))
		{
			case RECOVERY_PGDATA_REQ :
				/*
				 * PGDATA information request
				 */
				/*
				 * get master server information
				 */
				memset(&packet,0,sizeof(packet));
				set_recovery_packet(&packet, RECOVERY_PGDATA_ANS) ;
				status = send_packet(&sock,&packet);
				PGR_Set_Cluster_Status(STATUS_RECOVERY);
				break;
			case RECOVERY_FSYNC_REQ : 
				/*
				 * get master server information
				 */
				memset(&packet,0,sizeof(packet));
				set_recovery_packet(&packet, RECOVERY_FSYNC_ANS );
				status = send_packet(&sock,&packet);
				PGR_Set_Cluster_Status(STATUS_RECOVERY);
				loop_end = true;
				break;
			case RECOVERY_ERROR_CONNECTION:
				memset(&packet,0,sizeof(packet));
				set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
				status = send_packet(&sock,&packet);
				PGR_Set_Cluster_Status(STATUS_REPLICATE);
				loop_end = true;
				break;
			case RECOVERY_ERROR_ANS:
			case RECOVERY_FINISH:
				PGR_Set_Cluster_Status(STATUS_REPLICATE);
				loop_end = true;
				break;
		}
		if (loop_end)
		{
			break;
		}
	}
	PGR_Close_Sock(&sock);
}

int
PGR_Master_Main(void)
{
	int status;
	int fd = -1;
	int rtn;
	int pid;

	if (PGCluster_Info == NULL)
	{
		return -1;
	}
	if ((pid = fork()) != 0 )
	{
		return pid;
	}
	
	memset(Local_Host_Name,0,sizeof(Local_Host_Name));
	gethostname(Local_Host_Name,sizeof(Local_Host_Name));
	pqsignal(SIGHUP, authdie);
	pqsignal(SIGTERM, authdie);
	pqsignal(SIGQUIT, authdie);
	pqsignal(SIGALRM, authdie);
	PG_SETMASK(&AuthBlockSig);

	status = STATUS_ERROR;
	status = PGR_Create_Socket_Bind(&fd, "", PGCluster_Info->Com_Info.ConfData_Info.Recovery_Port_Number);

	if (status != STATUS_OK)
	{
		return pid;
	}
	for (;;)
	{
		fd_set	  rmask;
		struct timeval timeout;

		timeout.tv_sec = 60;
		timeout.tv_usec = 0;

		/*
		 * Wait for something to happen.
		 */
		FD_ZERO(&rmask);
		FD_SET(fd,&rmask);
		rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
		if (rtn && FD_ISSET(fd, &rmask))
		{
			master_loop(fd);
		}
	}
	return pid;
}

static int
start_recovery_send(int * sock, ReplicateServerInfo * host)
{
	int status;
	RecoveryPacket packet;
	status = PGR_Create_Socket_Connect(sock, host->hostName, host->recoveryPortNumber);
	if (status != STATUS_OK)
	{
		if (Debug_pretty_print)
		{
			elog(DEBUG1,"connection error to replication server");
		}
		return STATUS_ERROR;
	}

	memset(&packet,0,sizeof(packet));
	set_recovery_packet(&packet, RECOVERY_PREPARE_REQ );
	status = send_packet(sock,&packet);

	return status;
}

static int
stop_recovery_send(int * sock, ReplicateServerInfo * host)
{
	int status;
	RecoveryPacket packet;
	memset(&packet,0,sizeof(packet));
	set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
	status = send_packet(sock,&packet);
	return status;
}

static int
direct_send_packet(int packet_no)
{

	int status;
	int fd = -1;
	ReplicateServerInfo * host;
	RecoveryPacket packet;

	host = PGR_get_replicate_server_info();
	if (host == NULL)
	{
		return STATUS_ERROR;
	}
	status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
	if (status != STATUS_OK)
	{
		PGR_Set_Replication_Server_Status(host,DATA_ERR);
		return STATUS_ERROR;
	}

	memset(&packet,0,sizeof(packet));
	set_recovery_packet(&packet, packet_no );
	status = send_packet(&fd,&packet);

	close(fd);

	return status;
}

int
PGR_recovery_error_send(void)
{
	return direct_send_packet(RECOVERY_ERROR_ANS);
}

int
PGR_recovery_finish_send(void)
{
	return direct_send_packet(RECOVERY_FINISH);
}

static int
rsync_pg_data(char * src, char * dest)
{
	int status;
	char *args[12];
	int pid;

	args[0] = "rsync";
	args[1] = "-a";
	args[2] = "-r";
	args[3] = "-z";
	args[4] = "--delete";
	args[5] = "-e";
	args[6] = PGCluster_Info->RsyncOption;
	args[7] = src;
	args[8] = dest;
	args[9] = NULL;
	pid = fork();
	if (pid == 0)
	{
		status = execv(PGCluster_Info->RsyncPath,args);
	}
	else if (pid > 0)
	{
		while (waitpid(pid, &status, WNOHANG) < 0 ) 
		{
			if (errno != EINTR)
				return STATUS_ERROR;
			usleep(10);
		}
	}
	return STATUS_OK;
}

static int
remove_dir(char * dir_name)
{
	DIR * dp = NULL;
	struct dirent *dirp = NULL;
	char fname[256];
	int status = 0;

	if ((dp = opendir(dir_name)) == NULL)
	{
		return STATUS_ERROR;
	}
	while ((dirp = readdir(dp)) != NULL)
	{
		if ((!strcmp(dirp->d_name,".")) ||
			(!strcmp(dirp->d_name,"..")))
		{
			continue;
		}
		sprintf(fname,"%s/%s",dir_name,dirp->d_name);
		status = remove(fname);
		if (status < 0)
		{
			remove_dir(fname);
		}
	}
	closedir(dp);
	if (remove(dir_name) < 0)
	{
		return STATUS_ERROR;
	}
	return STATUS_OK;
}

static int
clear_bkup_dir(char * dir_name)
{
	char bkp_dir[256];
	pid_t pid = getpid();

	sprintf(bkp_dir,"%s_%d",dir_name,pid);
	return (remove_dir(bkp_dir));
}

static int
bkup_dir(char * dir_name)
{
	int cnt = 0;
	char org_dir[256];
	char bkp_dir[256];
	pid_t pid = getpid();
	struct stat fstat;

	sprintf(org_dir,"%s",dir_name);
	sprintf(bkp_dir,"%s_%d",dir_name,pid);

	cnt = 0;
	while (stat(org_dir,&fstat) < 0)
	{
		if (cnt > MAX_RETRY_TIMES )
		{
			return STATUS_ERROR;
		}
		cnt ++;
		sleep(1);
	}
	cnt = 0;
	while (rename(org_dir,bkp_dir) < 0)
	{
		remove_dir(bkp_dir);
		if (cnt > MAX_RETRY_TIMES )
		{
			return STATUS_ERROR;
		}
		sleep(1);
		cnt ++;
	}
	return STATUS_OK;
}

static int
restore_dir(char * dir_name)
{
	int  cnt = 0;
	char org_dir[256];
	char bkp_dir[256];
	pid_t pid = getpid();
	struct stat fstat;

	sprintf(org_dir,"%s",dir_name);
	sprintf(bkp_dir,"%s_%d",dir_name,pid);
	cnt = 0;
	while (stat(bkp_dir,&fstat) < 0)
	{
		if (cnt > MAX_RETRY_TIMES )
		{
			return STATUS_ERROR;
		}
		cnt ++;
		sleep(1);
	}
	cnt = 0;
	while (rename(bkp_dir,org_dir) < 0)
	{
		remove_dir(org_dir);
		if (cnt > MAX_RETRY_TIMES )
		{
			return STATUS_ERROR;
		}
		sleep(1);
		cnt ++;
	}
	return STATUS_OK;
}

static int
rsync_global_dir(char * src, char * dest)
{
	int status;
	char control_file[256];
	char org_dir[256];
	char src_dir[256];
	struct stat fstat;
	int cnt = 0;

	sprintf(org_dir,"%s/global",dest);
	sprintf(control_file,"%s/global/pg_control",dest);
	if (bkup_dir(org_dir) != STATUS_OK)
	{
		return STATUS_ERROR;
	}
	sprintf(src_dir,"%s/global",src);
	status = rsync_pg_data(src_dir, dest);
	if (status != STATUS_OK )
	{
		restore_dir(org_dir);
		return STATUS_ERROR;
	}
	/* check pg_control file */
	cnt = 0;
	while (stat(control_file,&fstat) < 0)
	{
		if (cnt > MAX_RETRY_TIMES )
		{
			restore_dir(org_dir);
			return STATUS_ERROR;
		}
		cnt ++;
		sleep(1);
	}
	clear_bkup_dir(org_dir);
	return STATUS_OK;
}

static int
first_recovery(char * src, char * dest, char * dir)
{
	int status = STATUS_OK;
	char src_dir[256];
	char dest_dir[256];
	struct stat fstat;
	int cnt = 0;

	memset(src_dir,0,sizeof(src_dir));
	memset(dest_dir,0,sizeof(dest_dir));
	sprintf(src_dir,"%s/%s",src,dir);
	sprintf(dest_dir,"%s/%s",dest,dir);
	status = bkup_dir(dest_dir);
	if (status < 0)
	{
		return STATUS_ERROR;
	}
	status = rsync_pg_data(src_dir, dest);
	if (status != STATUS_OK )
	{
		restore_dir(dest_dir);
		return STATUS_ERROR;
	}
	/* check pg_control file */
	cnt = 0;
	while (stat(dest_dir,&fstat) < 0)
	{
		if (cnt > MAX_RETRY_TIMES )
		{
			restore_dir(dest_dir);
			return STATUS_ERROR;
		}
		cnt ++;
		sleep(1);
	}
	return STATUS_OK;
}

static int
second_recovery(char * src, char * dest, char * dir)
{
	int status = STATUS_OK;
	char src_dir[256];
	char dest_dir[256];
	struct stat fstat;
	int cnt = 0;

	memset(src_dir,0,sizeof(src_dir));
	memset(dest_dir,0,sizeof(dest_dir));
	sprintf(src_dir,"%s/%s",src,dir);
	sprintf(dest_dir,"%s/%s",dest,dir);

	status = rsync_pg_data(src_dir, dest);
	if (status != STATUS_OK )
	{
		restore_dir(dest_dir);
		return STATUS_ERROR;
	}
	clear_bkup_dir(dest_dir);

	/* check pg_control file */
	cnt = 0;
	do {
		if (cnt > MAX_RETRY_TIMES )
		{
			restore_dir(dest_dir);
			return STATUS_ERROR;
		}
		cnt ++;
		sleep(1);
	} while (stat(dest_dir,&fstat) < 0);
	return STATUS_OK;
}

static int
recovery_rsync(char * src , char * dest, int stage)
{
	if ((src== NULL) || ( dest == NULL))
	{
		return STATUS_ERROR;
	}

	fprintf(stderr,"%s recovery step of [global] directory...",
			((stage == 1)?"1st":"2nd"));
	/* recovery step of "global" directory */
	if (rsync_global_dir(src, dest) != STATUS_OK)
	{
		fprintf(stderr,"NG\n");
		return STATUS_ERROR;
	}
	fprintf(stderr,"OK\n");

	if (stage == PGR_1ST_RECOVERY)
	{
		fprintf(stderr,"1st recovery step of [base] directory...");
		/* 1st recovery step of "base" directory */
		if (first_recovery(src,dest,"base") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");

		fprintf(stderr,"1st recovery step of [pg_clog] directory...");
		/* 1st recovery step of "pg_clog" directory */
		if (first_recovery(src,dest,"pg_clog") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");

		fprintf(stderr,"1st recovery step of [pg_xlog] directory...");
		/* 1st recovery step of "pg_xlog" directory */
		if (first_recovery(src,dest,"pg_xlog") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");

		fprintf(stderr,"1st recovery step of [pg_subtrans] directory...");
		/* 1st recovery step of "pg_subtrans" directory */
		if (first_recovery(src,dest,"pg_subtrans") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");

		fprintf(stderr,"1st recovery step of [pg_tblspc] directory...");
		/* 1st recovery step of "pg_tblspc" directory */
		if (first_recovery(src,dest,"pg_tblspc") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");
	}
	else
	{
		fprintf(stderr,"2ndt recovery step of [base] directory...");
		/* 2nd recovery step of "base" directory */
		if (second_recovery(src,dest,"base") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");

		fprintf(stderr,"2nd recovery step of [pg_clog] directory...");
		/* 2nd recovery step of "pg_clog" directory */
		if (second_recovery(src,dest,"pg_clog") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");

		fprintf(stderr,"2nd recovery step of [pg_xlog] directory...");
		/* 2nd recovery step of "pg_xlog" directory */
		if (second_recovery(src,dest,"pg_xlog") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");

		fprintf(stderr,"2nd recovery step of [pg_subtrans] directory...");
		/* 2nd recovery step of "pg_subtrans" directory */
		if (second_recovery(src,dest,"pg_subtrans") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");

		fprintf(stderr,"2nd recovery step of [pg_tblspc] directory...");
		/* 2nd recovery step of "pg_tblspc" directory */
		if (second_recovery(src,dest,"pg_tblspc") != STATUS_OK)
		{
			fprintf(stderr,"NG\n");
			return STATUS_ERROR;
		}
		fprintf(stderr,"OK\n");
	}

	return STATUS_OK;
}

static int
recovery_loop(int fd)
{

	int status = STATUS_OK;
	RecoveryPacket packet;
	int r_size = 0;
	int rtn = RECOVERY_LOOP_END;
	char src[256];
	bool need_sync_table_space = false;

	r_size = read_packet(fd,&packet);
	if (r_size <= 0)
	{
		rtn = RECOVERY_LOOP_FAIL;
	}
	switch (ntohs(packet.packet_no))
	{
		case RECOVERY_PREPARE_ANS :
			/*
			 * get master information
			 */
			/*
			 * rsync master data before recovery
			 */
			if (Debug_pretty_print)
			{
				elog(DEBUG1,"local host : %s  master:%s",Local_Host_Name,packet.hostName);
			}
			if (!strncmp(Local_Host_Name,packet.hostName,strlen(packet.hostName)))
			{
				strcpy(src,packet.pg_data);
				need_sync_table_space = false;
			}
			else
			{
				sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
				need_sync_table_space = true;
			}
			status = recovery_rsync(src,DataDir,PGR_1ST_RECOVERY);
			if (status != STATUS_OK)
			{
				if (Debug_pretty_print)
				{
					elog(DEBUG1,"1st rsync error");
				}
				rtn = RECOVERY_LOOP_FAIL;
				break;
			}
			if (need_sync_table_space == true)
			{
				fprintf(stderr,"1st sync_table_space ");
				status = sync_table_space(packet.hostName, ntohs(packet.port), packet.userName);
				if (status != STATUS_OK)
				{
					if (Debug_pretty_print)
					{
						elog(DEBUG1,"1st sync table space error");
					}
					rtn = RECOVERY_LOOP_FAIL;
					fprintf(stderr,"NG\n");
					break;
				}
				fprintf(stderr,"OK\n");
			}
			/*
			 * send recovery start request
			 */
			PGRset_recovery_packet_no(&packet, RECOVERY_START_REQ );
			status = send_packet(&fd,&packet);
			if (status != STATUS_OK)
			{
				rtn = RECOVERY_LOOP_FAIL;
				break;
			}
			rtn = RECOVERY_LOOP_CONTINUE;
			break;
		case RECOVERY_START_ANS : 
			/*
			 * rsync master data before recovery
			 */
			if (!strncmp(Local_Host_Name,packet.hostName,strlen(packet.hostName)))
			{
				strcpy(src,packet.pg_data);
				need_sync_table_space = false;
			}
			else
			{
				sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
				need_sync_table_space = true;
			}
			status = recovery_rsync(src,DataDir,PGR_2ND_RECOVERY);
			if (status != STATUS_OK)
			{
				if (Debug_pretty_print)
				{
					elog(DEBUG1,"2nd rsync error");
				}
				rtn = RECOVERY_LOOP_FAIL;
				break;
			}
			if (need_sync_table_space == true)
			{
				fprintf(stderr,"2nd sync_table_space ");
				status = sync_table_space(packet.hostName, ntohs(packet.port), packet.userName);
				if (status != STATUS_OK)
				{
					if (Debug_pretty_print)
					{
						elog(DEBUG1,"2nd sync table space error");
					}
					rtn = RECOVERY_LOOP_FAIL;
					fprintf(stderr,"NG\n");
					break;
				}
				fprintf(stderr,"OK\n");
			}
			rtn = RECOVERY_LOOP_END;
			break;
		case RECOVERY_ERROR_OCCUPIED:
		case RECOVERY_ERROR_CONNECTION:
			rtn = RECOVERY_LOOP_FAIL;
			break;
	}

	return rtn;
}

int
PGR_Recovery_Main(void)
{
	int status;
	int fd = -1;
	int rtn;
	ReplicateServerInfo * host;

	memset(Local_Host_Name,0,sizeof(Local_Host_Name));
	gethostname(Local_Host_Name,sizeof(Local_Host_Name));

	status = STATUS_ERROR;
	host = PGR_get_replicate_server_info();
	if (host == NULL)
	{
		if (Debug_pretty_print)
		{
			elog(DEBUG1,"not found replication server");
		}
		return STATUS_ERROR;
	}

	status = start_recovery_send(&fd,host);
	if (status != STATUS_OK)
	{
		PGR_Set_Replication_Server_Status(host,DATA_ERR);
		close(fd);
		if (Debug_pretty_print)
		{
			elog(DEBUG1,"start recovery packet send error");
		}
		return STATUS_ERROR;
	}

	for (;;)
	{
		fd_set	  rmask;
		struct timeval timeout;

		timeout.tv_sec = RECOVERY_TIMEOUT;
		timeout.tv_usec = 0;

		/*
		 * Wait for something to happen.
		 */
		FD_ZERO(&rmask);
		FD_SET(fd,&rmask);
		rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
		if (rtn && FD_ISSET(fd, &rmask))
		{
			status = recovery_loop(fd);
			if (status == RECOVERY_LOOP_CONTINUE)
			{
				continue;
			}
			if (status == RECOVERY_LOOP_END)
			{
				close(fd);
				break;
			}
			if (status == RECOVERY_LOOP_FAIL)
			{
				status = stop_recovery_send(&fd,host);
				if (status != STATUS_OK)
				{
					close(fd);
					return STATUS_ERROR;
				}
				close(fd);
				return STATUS_ERROR;
			}
		}
	}
	return STATUS_OK;
}

static void
show_recovery_packet(RecoveryPacket * packet)
{

	if (Debug_pretty_print)
	{
		elog(DEBUG1,"no = %d",ntohs(packet->packet_no));
		elog(DEBUG1,"max_connect = %d",ntohs(packet->max_connect));
		elog(DEBUG1,"port = %d",ntohs(packet->port));
		elog(DEBUG1,"recoveryPort = %d",ntohs(packet->recoveryPort));
		if (packet->hostName != NULL)
			elog(DEBUG1,"hostName = %s",packet->hostName);
		if (packet->pg_data != NULL)
			elog(DEBUG1,"pg_data = %s",packet->pg_data);
	}
}

static void
set_recovery_packet(RecoveryPacket * packet, int packet_no)
{

	if (packet == NULL)
	{
		return;
	}
	PGRset_recovery_packet_no(packet, packet_no );
	packet->max_connect = htons(MaxBackends);
	packet->port = htons(PostPortNumber);
	if (PGCluster_Info != NULL)
	{
		packet->recoveryPort = htons(PGCluster_Info->Com_Info.ConfData_Info.Recovery_Port_Number);
	}
	gethostname(packet->hostName,sizeof(packet->hostName));
	memcpy(packet->pg_data,DataDir,sizeof(packet->pg_data));
	memset(packet->userName,0,sizeof(packet->userName));
	cuserid(packet->userName);
}

static int
sync_table_space(char * hostName, uint16_t portNum, char * userName)
{
	PGresult * res = (PGresult *)NULL;
	int i = 0;
	int row_num = 0;
	char * location = NULL;
	int rtn = STATUS_OK;

	res = get_table_space_location(hostName, portNum, userName);
	if (res == (PGresult *)NULL)
	{
		return STATUS_ERROR;
	}
	row_num = PQntuples(res);
	for ( i = 0 ; i < row_num ; i ++)
	{
		location = PQgetvalue(res,i,0);
		if (strlen(location) > 0 )
		{
			rtn = rsync_table_space(hostName, location);
			fprintf(stderr,".");
		}
	}
	if (res != (PGresult *)NULL)
	{
		PQclear(res);
	}

	return STATUS_OK;
}

static PGresult *
get_table_space_location(char * hostName, uint16_t portNum, char * userName)
{
	char * func = "get_table_space_location()";
	PGresult * res = (PGresult *)NULL;
	int cnt = 0;
	PGconn * conn = (PGconn *)NULL;
	char port[8];
	char *database = "template1";
	char * query = "select spclocation from pg_tablespace";

	if ( (hostName == NULL) ||
		(portNum <= 0)      ||
		(userName == NULL))
	{
		return (PGresult *)NULL;
	}
	snprintf(port,sizeof(port),"%d", portNum);

	/* create connection to master */
	conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
	if (conn == NULL)
	{
		return (PGresult *)NULL;
	}
	/* check to see that the backend Connection was successfully made */
	cnt = 0;
	while (PQstatus(conn) == CONNECTION_BAD)
	{
		if (conn != NULL)
		{
			PQfinish(conn);
		}
		if (cnt > MAX_RETRY_TIMES )
		{
			return (PGresult *)NULL;
		}
		conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
		cnt ++;
	}
	res = PQexec(conn , query);
	if ((res == NULL) ||
		(PQresultStatus(res) != PGRES_TUPLES_OK))
	{
		PQclear(res);
		res = (PGresult *)NULL;
	}
	if (conn != NULL)
	{
		PQfinish(conn);
	}

	return res;
}

static int
rsync_table_space(char * hostName, char * location)
{
	int status = STATUS_OK;
	char src_dir[256];
	char dest_dir[256];
	struct stat fstat;
	int cnt = 0;
	char * term = NULL;

	sprintf(src_dir,"%s:%s",hostName,location);
	strncpy(dest_dir,location,sizeof(dest_dir));
	term = strrchr(dest_dir,'/');
	if (term != NULL)
	{
		*term = '\0';
	}

	status = bkup_dir(location);
	status = rsync_pg_data(src_dir, dest_dir);
	if (status != STATUS_OK )
	{
		restore_dir(location);
		return STATUS_ERROR;
	}
	/* check pg_control file */
	cnt = 0;
	while (stat(location,&fstat) < 0)
	{
		if (cnt > MAX_RETRY_TIMES )
		{
			restore_dir(location);
			return STATUS_ERROR;
		}
		cnt ++;
		sleep(1);
	}
	clear_bkup_dir(location);
	return STATUS_OK;
}
#endif /* USE_REPLICATION */
