#!/usr/bin/perl -w
#------------------------------------------------------------------------------
# Ce script est une version modifiee de mysql2pgsql afin de:
# - gerer les base mysql innodb
# - traiter tous les fichiers mysql/data/*.sql vers pgsql/data
# - gerer les autoincrement en SERIAL plutot qu'en sequenceurs
# - utiliser le CHECK plutot que des sous-tables pour les types enum
# - corriger de nombreux bugs
#
# Regle ecriture de fichier sql portables
# Pour les cles autoincrement:  rowid integer AUTO_INCREMENT PRIMARY KEY,
# Mettre les index dans fichier.key.sql
#------------------------------------------------------------------------------
## no critic (InputOutput::ProhibitExplicitStdin,InputOutput::RequireBriefOpen)
use Data::Dumper;
use Getopt::Long;
use strict;
use warnings;

use vars qw/ $DIR $PROG $Extension $SOURCE $DESTI %filelist $stop /;

# command line options
my ( $opt_debug, $opt_help );

# general values
my ( $out, $size );

# variables for constructing pre-create-table entities
my $create_sql     = '';    # if empty we are not making a create statement
my $create_index   = '';    # if empty we are not making a create statement
my %enum_datafield = ();    #  holds enumeration choices
my ( @column_values, $enum_column, $seq );
my $table = "";

#------------------------------------------------------------------------------
# MAIN
#------------------------------------------------------------------------------
( $DIR  = $0 ) =~ s/([^\/\\]+)$//;
( $PROG = $1 ) =~ s/\.([^\.]*)$//;
$Extension = $1;
$DIR ||= '.';
$DIR =~ s/([^\/\\])[\\\/]+$/$1/;

$SOURCE = "$DIR/install/mysql/tables";
$DESTI  = "$DIR/install/pgsql/tables";

# Recherche tous les fichiers .sql
opendir( my $dir, $SOURCE );
foreach my $file ( readdir($dir) ) {
	if ( $file =~ /\.sql$/ && -f "$SOURCE/$file" ) {
		print "Found file $file\n";
		$filelist{$file} = 1;
	}
}
closedir($dir);

# Boucle sur tous les fichiers de SOURCE
#---------------------------------------
foreach my $file ( keys %filelist ) {

	local $ARGV[0] = "$SOURCE/$file";
	local $ARGV[1] = "$DESTI/$file";

	print "Convert file $ARGV[0] into $ARGV[1]\n";

   # MySQL to PostgreSQL dump file converter
   #
   # For usage: perl mysql2pgsql.perl --help
   #
   # homepage: http://www.rot13.org/~dpavlin/projects.html
   # 1999-12-15 DbP -- Dobrica Pavlinusic <dpavlin@rot13.org>
   # 1999-12-26 DbP don't make serial from auto_increment, create all manually
   #		 (to set start value right)
   # 2000-01-11 DbP now creates sequences with correct value
   # 2000-04-25 DbP import into CVS (at cvs.linux.hr)
   # 2001-01-29 tpo -- Tomas Pospisek <tpo@sourcepole.ch>:
   #		 1) make script comply to usage:
   #		 2) make script output to STDOUT instead of STERR
   #		 3) change verbosity behaveour
   #		 4) add debug option
   # see rest of changelog at http://cvs.linux.hr/cvsweb.cgi/sql/mysql2pgsql
   # 2003-12-16 jsp -- Joe Speigle <joe.speigle@jklh.us>:
   #		converts: s/\) *Type=MyISAM;/);/i, enum data type -> references,
   #		auto_increment->sequences
   # 2004-01-13 jsp -- moved project to gborg; both the above declined ownership
   # 2004-06-29 converts: year(4), year(2)
   # homepage: gborg.postgresql.org

	GetOptions( "debug", "help" );

	my $DEBUG = $opt_debug || 0;
	my $HELP  = $opt_help  || 0;

	if ( ($HELP) || !defined( $ARGV[0] ) || !defined( $ARGV[1] ) ) {
		print
"Usage: perl $0 {--verbose|--help|--debug} mysql_dump_file.sql pg_dump_file.sql\n";
		print "\t* OPTIONS\n";
		print
		  "\t--verbose tees to pg_dump_file.sql and STDOUT during conversion\n";
		print "\t--debug does ?? \n";
		print "\t--help prints this message \n";
		print "\t* REQUIRED ARGUMENTS\n";
		if ( defined( $ARGV[0] ) ) {
			print "\tmysql_dump_file.sql ($ARGV[0])\n";
		}
		else {
			print "\tmysql_dump_file.sql (undefined)\n";
		}
		if ( defined( $ARGV[1] ) ) {
			print "\tpg_dump_file.sql ($ARGV[1])\n";
		}
		else {
			print "\tpg_dump_file.sql (undefined)\n";
		}
		exit 1;
	}

	open( my $in, "<", "$ARGV[0]" )
	  || die "can't open mysql dump file $ARGV[0]";
	open( my $out, ">", "$ARGV[1]" ) || die "can't open pg dump file $ARGV[1]";
	print $out "-- Generated by $PROG\n";
	print $out "-- (c) 2004, PostgreSQL Inc.\n";
	print $out "-- (c) 2005, Laurent Destailleur.\n";
	print $out "\n";

	# Output for create table and create index
	sub output_create {

		# If command ends with "xxx,);", we change to "xxx);"
		$create_sql =~ s/,(\s*)\);/$1\);/m;

		# If command ends with "xxx, -- yyy );", we change to "xxx -- yyy);"
		$create_sql =~ s/,(\s*\-\-[^\)\n]*)(\s*)\);/$1\n\);/m;

		print $out $create_sql;
		if ($create_index) {
			print $out "\n";
			print $out $create_index;
		}
		return;
	}

	# Reset when moving from each "create table" to "insert" part of dump
	sub reset_vars() {
		$create_sql     = "";
		$create_index   = "";
		%enum_datafield = ();
		$enum_column    = '';
		return;
	}

	# Boucle sur contenu fichier source
	#----------------------------------
	while (<$in>) {

		# comments or empty lines
		if (/^-- \$Id/) {
			$_ =~ s/\$//g;
			print $out $_;
			next;
		}

		# comments or empty lines
		if ( /^#/ || /^$/ || /^--/ ) {
			print $out $_;
			next;
		}
		if (/^USE\s*([^;]*);/) {
			print $out "\\c " . $1;
			next;
		}
		if ( $create_sql ne "" )
		{    # we are inside create table statement so let's process datatypes

			if (/\);/i) {    # end of create table sequence
				$create_sql =~ s/,$//g;    # strip last , inside create table
				&output_create;
				&reset_vars();
				next;

				# LDR Added "innodb" and "engine"
			}
			elsif (/(ISAM|innodb)/i) {    # end of create table sequence
				s/\) *type=(MyISAM|innodb);/);/i;
				s/\) *engine=(MyISAM|innodb);/);/i;
				$create_sql =~ s/,$//g;    # strip last , inside create table
				$create_sql .= $_;
				&output_create;
				&reset_vars();
				next;
			}

			# enum -> check
			if (
/([\w\"]*)\s+enum\s*\(((?:['"][\?\w]+['"]\s*,)+['"][\?\w]+['"])\)(.*)$/i
			  )
			{
				$enum_column = $1;
				$enum_datafield{$enum_column} = $2;    # 'abc','def', ...
				my $suite     = $3;
				my $maxlength = 0;
				foreach my $enum ( split( ',', $enum_datafield{$enum_column} ) )
				{
					$enum =~ s/[\"\']//g;
					if ( $maxlength < length($enum) ) {
						$maxlength = length($enum);
					}
				}
				$enum_datafield{$enum_column} =~ s/\"/\'/g;
				$_ =
qq~  $enum_column CHAR($maxlength) CHECK ($enum_column IN ($enum_datafield{$enum_column})) $suite\n~;

				# int, auto_increment -> serial
			}
			elsif (/^[\s\t]*(\w*)\s*.*int.*auto_increment/i) {
				$seq = qq~${table}_${1}_seq~;
s/[\s\t]*([a-zA-Z_0-9]*)\s*.*int.*auto_increment[^,]*/  $1 SERIAL PRIMARY KEY/ig;
				$create_sql .= $_;
				next;

				# int type conversion
			}
			elsif (/(\w*)int\(\d+\)/i) {
				$size = $1;
				$size =~ tr [A-Z] [a-z];
				if ( $size eq "tiny" || $size eq "small" ) {
					$out = "int2";
				}
				elsif ( $size eq "big" ) {
					$out = "int8";
				}
				else {
					$out = "int4";
				}
				s/\w*int\(\d+\)/$out/g;
			}

			# tinyint -> smallint
			elsif (/tinyint/i) {
				s/tinyint/smallint/g;
			}

			# nuke unsigned
			s/(int\w+|smallint)\s+unsigned/$1/gi;

			# blob -> text
			s/\w*blob/text/gi;

			# tinytext/mediumtext -> text
			s/tinytext/text/gi;
			s/mediumtext/text/gi;

			# char -> varchar
			# PostgreSQL would otherwise pad with spaces as opposed
			# to MySQL! Your user interface may depend on this!
			s/(\s+)char/${1}varchar/gi;

			# nuke date representation (not supported in PostgreSQL)
			s/datetime default '[^']+'/datetime/i;
			s/date default '[^']+'/datetime/i;
			s/time default '[^']+'/datetime/i;

			# change not null datetime field to null valid ones
			# (to support remapping of "zero time" to null
			s/datetime not null/datetime/i;
			s/datetime/timestamp/i;

			# nuke size of timestamp
			s/timestamp\([^)]*\)/timestamp/i;

			# double -> numeric
			s/^double/numeric/i;
			s/(\s*)double/${1}numeric/i;

			# float -> numeric
			s/^float/numeric/i;
			s/(\s*)float/${1}numeric/i;

			# unique key(field1,field2)
			if (/unique key\s*\((\w+\s*,\s*\w+)\)/i) {
				s/unique key\s*\((\w+\s*,\s*\w+)\)/UNIQUE\($1\)/i;
				$create_sql .= $_;
				next;
			}

			# unique index(field1,field2)
			if (/unique index\s*\((\w+\s*,\s*\w+)\)/i) {
				s/unique index\s*\((\w+\s*,\s*\w+)\)/UNIQUE\($1\)/i;
				$create_sql .= $_;
				next;
			}

			# unique key [name] (field)
			if (/unique key\s*(\w*)\s*\((\w+)\)/i) {
				s/unique key\s*(\w*)\s*\((\w+)\)/UNIQUE\($2\)/i;
				my $idxname = ( $1 ? "$1" : "idx_${table}_$2" );
				$create_sql   .= $_;
				$create_index .= "CREATE INDEX $idxname ON $table ($2);\n";
				next;
			}

			# unique index [name] (field)
			if (/unique index\s*(\w*)\s*\((\w+)\)/i) {
				s/unique index\s*(\w*)\s*\((\w+)\)/UNIQUE\($2\)/i;
				my $idxname = ( $1 ? "$1" : "idx_${table}_$2" );
				$create_sql   .= $_;
				$create_index .= "CREATE INDEX $idxname ON $table ($2);\n";
				next;
			}

			# unique (field) et unique (field1, field2 ...)
			if (/unique\s*\(([\w,\s]+)\)/i) {
				s/unique\s*\(([\w,\s]+)\)/UNIQUE\($1\)/i;
				my $fieldlist = "$1";
				my $idxname   = "idx_${table}_${fieldlist}";
				$idxname =~ s/\W/_/g;
				$idxname =~ tr/_/_/s;
				$create_sql .= $_;
				$create_index .=
				  "CREATE INDEX $idxname ON $table ($fieldlist);\n";
				next;
			}

			# index(field)
			if (/index\s*(\w*)\s*\((\w+)\)/i) {
				my $idxname = ( $1 ? "$1" : "idx_${table}_$2" );
				$create_index .= "CREATE INDEX $idxname ON $table ($2);\n";
				next;
			}

			# primary key
			if ( /\bkey\b/i && !/^\s+primary key\s+/i ) {
				s/KEY(\s+)[^(]*(\s+)/$1 UNIQUE $2/i
				  ;    # hack off name of the non-primary key
			}

			# key(xxx)
			if (/key\s*\((\w+)\)/i) {
				my $idxname = "idx_${table}_$1";
				$create_index .= "CREATE INDEX $idxname ON $table ($1);\n";
				next;
			}

			# Quote column names
			s/(^\s*)([^\s\-\(]+)(\s*)/$1"$2"$3/gi if ( !/\bkey\b/i );

			# Remap columns with names of existing system attribute
			if (/"oid"/i) {
				s/"oid"/"_oid"/g;
				print STDERR
"WARNING: table $table uses column \"oid\" which is renamed to \"_oid\"\nYou should fix application manually! Press return to continue.";
				my $wait = <STDIN>;
			}
			s/oid/_oid/i if ( /key/i && /oid/i );    # fix oid in key
			$create_sql .= $_;
		} #  END of if ($create_sql ne "") i.e. were inside create table statement so processed datatypes
		else {    # not inside create table
			      #---- fix data in inserted data: (from MS world)
			      # FIX: disabled for now
			if ( 00 && /insert into/i ) {
				s!\x96!-!g;       # --
				s!\x93!"!g;       # ``
				s!\x94!"!g;       # ''
				s!\x85!... !g;    # \ldots
				s!\x92!`!g;
			}

			# fix dates '0000-00-00 00:00:00' (should be null)
			s/'0000-00-00 00:00:00'/null/gi;
			s/'0000-00-00'/null/gi;
			s/'00:00:00'/null/gi;
s/([12]\d\d\d)([01]\d)([0-3]\d)([0-2]\d)([0-6]\d)([0-6]\d)/'$1-$2-$3 $4:$5:$6'/;

			if (/create\s+table\s+(\w+)/i) {
				$create_sql = $_;
				/create\s*table\s*(\w+)/i;
				$table = $1 if ( defined($1) );
			}
			else {
				print $out $_;
			}
		}    # end of if inside create_table
	}    # END while(<IN>)

	close $in;
	close $out;

}

print "\n";
print "Build " . ( scalar keys %filelist ) . " file(s).\n";
print "\n";
print "Press a key to finish...\n";
$stop = <STDIN>;

0;
