#!/usr/bin/perl 
# Converts a GeneWeb GW file into an SQL file
#
# Written by Yann Corno - yann.corno@free.fr (feed-back to the author is welcome!)
# Home web site of this GeneWeb tool: http://www.corno.com/geneweb/ (make sure you check for the latest version there!)
#
#
# Last update: see history below
#
# Usage:
#
#    perl gw2sql.pl [-i infile.gw] [-o outfile.sql] [--nocreate|-n] [--nodata] [--mysql3] [--relax]
#
#    -i yourfile.gw:  name of your GW input file. If omitted, standard input is used.
#    -o yourfile.sql: name of the output SQL file. If omitted, standard output is used.
#    -n, --nocreate:  do not ouput the tables creation instructions.
#    --nodata:        ignore any gw file or standard input. Use this option to only output the table creation instructions. Useful when using sql2gw.
#    --mysql3:        use if you have an UTF-8 encoded file, and that
#                     your SQL database does not support the
#                     ALTER TABLE db DEFAULT CHARACTER SET uft8;
#                     instruction (supported by MySQL only from v. 4.0).
#    --relax:         skip lines that could not be understood (juts issue a warning). 
#                     By default, it fails when an unknown line is encountered.
#          
#
# Example: the following example makes a complete transfer of a GeneWeb database into a 
#          fresh MySQL one (under Unix or similar):
#
#    gwu YourGeneWebBase | perl gw2sql.pl | mysql yourDB
#
# Windows users will have to use files:
#    gwu YourGeneWebBase -o YourGeneWebBase.gw
#    perl gw2sql.pl -i YourGeneWebBase.gw -o YourGeneWebBase.sql
#
# gw2sql converts a GeneWeb GW file into an SQL file that can be read in any SQL database.
# It starts by printing a list of table creation instructions, defining the tables structures.
# (careful: there are DROP TABLES instructions, so previous versions of these tables will be trashed).
# You may want to modify this code to suit your own needs, such as adding indexes in these 
# tables, or changing their prefix (see the $dbprefix variable), etc.
# 
# There are several tables created:
#  - gw_family: It contains the references of the husband and wife, and the GeneWeb family information. 
#               Each family is given a unique ID.
#  - gw_person: This contains the details for each person
#  - gw_family_child: This builds the children to family relationship. It contains the child references,
#               and the ID of the family.
#  - gw_family_witness: works exactly like gw_family_child, but for wedding witnesses
#  - gw_aliases: contains the alias, firstname alias, lastname alias, nickname and title of a person. This
#               is stored in a separate table because there can be several ones for the same person.
#  - gw_relationship: contains the relationship between two persons: God Father, Foster Mother, etc.
#  - gw_globaldata: contains misc. data: global notes of the database, etc.
#  - gw_extpages: extended pages
#
# Important: About referencing a person: Because the GW format does not provide the person GeneWeb ID, :-(
# we have to rely on the uniqueness of the triplet Lastname + Fistname + Number. Fortunately, GeneWeb
# makes a good job at it in its GW format, so it works pretty well. So the database solution was to create
# a primary key in the gw_person table that combines the 3 values together.
#
# About dates: as you might know if you read the GW documentation, the GeneWeb format for dates is
# quite extensive. To address most uses, the dates fields are doubled. The GeneWeb date is put 'as is'
# in a first field. Then, if the format is a simple and complete one (i.e. dd/mm/yyyy), it is copied
# in SQL format in a second field. See the 'birth_date' and 'birth_date_sql' fields as an example. These
# '*_sql' fields are indexed, and will allow easy searches. If the GeneWeb date could not be converted,
# the SQL field contains NULL.
#
# About unknown persons: GeneWeb uses a question mark '?' to indicate an unknown last name or first name.
# To ensure uniqueness of persons, it is assumed that each such person is a different one from the others.
# Therefore, the last name and/or first name are given a unique number after the '?'. So you will end up
# with gw_person records that have last names like '?1234', etc.
#
# Note: this first version is not intended to go deeply into the GeneWeb data structures (like dates).
# It is merely a translation tool, and the database structure is really not sophisticated. As you will
# see, you end up with hollow files (lots of NULL fields). It is then up to you to use your own SQL
# skills to extract or compile the information in a way that fits your needs. SQL is quite convenient for
# this.
#
#
# !!! TO DO !!!
#	- help/usage option
#	- csrc family is not properly supported: this is a Common Source, meaning all children share this source. This is NOT a family source (unlike src). This is the same as "cbp"
#
# This software can be used under the standard GPL licence. Use at your own risks. 
# http://cristal.inria.fr/%7Eddr/GeneWeb/LICENCE.htm
#
# 2005-08-09: started coding
# 2005-08-16: first alpha version
# 2005-08-17: support for Unicode/UTF-8 encoding (and some other encodings)
# 2005-08-20: support for multiple aliases, nicknames, titles, etc. They are now stored in a separate database, gw_aliases
# 2005-08-21: fix for notes that have lines beginning with '-'. Improved '_' substitution
# 2005-08-21: support for relationships (God fathers...)
# 2005-08-22: support for #nsck mf and the #noment tags in families
# 2005-08-23: support for global notes (stored in gw_globaldata). Treat the case of the unknown DoD (we don't support it)
# 2005-08-26: support for person definition in the rel tag (relationships)
# 2005-08-31: support for common birth places: #cbp tag
# 2005-09-09: support for notes-db and page-ext tages (wizard notes and extented pages). Support for comments (start by #)
# 2005-09-09: added --relax option, for non-strict parsing
# 2005-09-10: support for #nsckm and Wizard notes. UTF-8 fix. Notes parsing fix.
# 2005-09-15: bug fix when parsing children with the Firstname[.number] Lastname DoB format
# 2005-09-26: better support for unknown persons ("? ?" or "xxx ?" or "? xxx"). Records are now created in the gw_person table.
# 2006-04-12: added --nodata option
# 2006-04-13: added support for the 'mj' death date (young death)
# 2006-10-30: corrected support for person definitions embedded in a relationship ('rel') tag
# 2006-10-30: added support for #nsck mm and ff - at least, this is not rejected anymore.
# 2009-06-07: limited combined primary keys to 100 characters for each column, to comply with MySQL limit

use Getopt::Long;

#-- Global constants - You may change these to your own taste

$dbprefix = "gw_";	# Tables prefix. This is inserted before the name of all tables generated by gw2sql.

#-- Global variables

$unknownIndex = 0;	# This index is used to create a unique LastName or FirstName when '?' is encountered. See replace_unknown()
$encoding = '';		# Use default encoding for now.
$cur_line = '';		# Current line (used for error messages)
$cur_line_num = 0;	# Current line number (idem)

#-- Functions

sub printArray
{ # debug proc to print an array
	my (@arr) = @_;
	my $idx = 0;
	foreach (@arr) {
		print "[".$idx."] ".$_."\n";
		$idx++;
	}
}

sub trim
{ # trim string (remove all spaces at the beginning and the end)
	my ($str) = @_;
	$str =~ s/^\s+//;
	$str =~ s/\s+$//;
	return $str;
}

sub replaceQuote
{ # Replace single quotes ' by \', so that they are acceptable by SQL
	my ($str) = @_;
	$str =~ s/'/\\'/g;
	return $str;
}

sub replace_
{ # Replace all underscores '_' by spaces ' ', and single quotes by backslashed ones (like replaceQuote())
	my ($str) = @_;
	$str =~ s/([^\\]|^)_/$1 /g;
	$str =~ s/\\_/_/g;				# We must retain '\_' sequences, and turn them into '_'
	$str =~ s/'/\\'/g;
	return $str;
}

sub replace_unknown
{ # Add an index in the name is '?', so that it becomes unique
	my ($str) = @_;
	if ($str eq '?')
	{
		$str .= $unknownIndex;
		$unknownIndex++;
	}
	return $str;
}

sub isDate
{ # Geneweb date format is quite complex:
  # 0) For DoD, and optional letter about the type of death: kmes
  # 1) An optional precision prefix: ~?<>
  # 2) dd/mm/yyyy, yyyy or mm/yyyy
  # 3) eventualy, a |yyyy or a ..yyyy to indicate 'or' or 'between'
  # 4) Calendar suffix: J for Julian, F for French Republican, H for Hebrew, nothing for Gregorian
  # 5) Free text in parenthesis ()
	my ($date) = @_;
	
	if ($date eq 'mj')	# Means "dead joung", and considered as a date...
	{
		return 1;
	}
	elsif ($date =~ /^([kmes])?(\~|\?|<|>)?(\d+)(\/\d+)?(\/\d+)?(?:(\.\.|\|)(\d+))?([JFH]?)(?:\(\S+\))?$/)
	{
		return 1;
	}
	else
	{
		return 0;
	}
}

sub extractDate
{ # See isDate for details about the date format
  # Returns a list as follows: (typeOfDeath, precisionCode, dd, mm, yyyy, intervalCode, yyyy, calendar, textdate)
	my ($date) = @_;
	#              1        2           3    4       5          6        7      8            9
	if ($date =~ /^([kmes])?(\~|\?|<|>)?(\d+)(\/\d+)?(\/\d+)?(?:(\.\.|\|)(\d+))?([JFH]?)(?:\((\S+)\))?$/)
	{
		if (defined($4) && defined($5))
		{ # dd/mm/yyyy
			return ($1, $2, $3, $4, $5, $6, $7, $8, $9);
		}
		elsif (defined($5))
		{ #mm/yyyy
			return ($1, $2, undef, $3, $4, $6, $7, $8, $9);
		}
		else
		{ # yyyy
			return ($1, $2, undef, undef, $3, $6, $7, $8, $9);
		}
	}
	else
	{
		die "Error line $cur_line_num: malformed date: >$date<\n";
	}
}

sub isFullDate
{ # Tell if the date can be used for SQL, i.e. if the date is fully defined like dd/mm/yyyy
	my ($date) = @_;
	if ($date =~ /^([kmes])?(\d+)\/(\d+)\/(\d+)$/)
	{
		return 1;
	}
	else
	{
		return 0;
	}
}

sub extractSQLdate
{ # Make a full date a good looking SQL date: dd/mm/yyyy -> 'yyyy-mm-dd'
	my ($date) = @_;
	if ($date =~ /^(?:[kmes])?(\d+)\/(\d+)\/(\d+)$/)
	{
		return "'$3-$2-$1'";
	}
	else
	{
		die "Error line $cur_line_num: extractSQLdate: Not a Full Date: $date\n";
	}
}

sub isReference
{ # Test is the person passed as a parameter is a reference or contains all the person data
	# A person reference contains only: LastName FirstName[.Number]
	# A person data contains at least the date of birth (DoB), even it is is a zero: [LastName] FirstName[.Number] [more data] DoB
	my ($person) = @_;
	$person = trim($person);
	if ($person =~ /^\?\s\S+$/ || $person =~ /^\S+\s\?$/)
	{
		return 0;
	}
	else
	{
		return $person =~ /^\S+\s\S+$/;
	}
}

sub extractReference
{ # Extract the LastName, FirstName and Number of a Person reference
	my ($person) = @_;
	my $lastname = '';
	my $firstname = '';
	my $number = '0';
	
	$person = trim($person);
	if ($person =~ /^(\S+)\s(\S+)$/)
	{
		$lastname = replace_($1);
		if ($2 =~ /(.*)\.(\d+)/)
		{
			$firstname = replace_($1);
			$number = $2; 
		}
		else
		{
			$firstname = replace_($2);
			$number = '0';
		}
		return ($lastname, $firstname, $number);
	}
	else
	{
		die "Error line $cur_line_num: malformed Person reference: >$person<\n";
	}
}#extractReference

sub processPerson
{ # Process a person and store him in the DB.
  # Return an array with the LastName, FirstName and Number
	# Parameters:
	#	$person: the full string of the person
	#	$gender: 'h' or 'm' for male, 'f' for female, '' for unknown
	#	$default_lastname: default last name (used for child)
	#	$isChild: tell us if $person is in child format or not. In child format, the fist name always comes first, and the last name come AFTER, and is optional
	#	$default_birth_place: default birth place. If not empty, it will be used is no birth place is defined
	my ($person, $gender, $default_lastname, $isChild, $default_birth_place) = @_;

	my	$lastname = '';
	my	$firstname = '';
	my	$number = '0';
	
	my	$str = '';
	my	$sql_fields = "";
	my	$sql_values = "";
	my	$birth_date = '';
	my	$death_date = '';
	my	$has_birthplace = 0;	# We don't have one yet
	
	my	@alias;				# List of aliases
	my	@firstname_alias;	# List of firstname aliases
	my	@lastname_alias;	# List of lastname aliases
	my	@nickname;			# List of qualifiers/nicknames
	my	@title;				# List of titles

	#-- Gender
	if ($gender ne '')
	{
		die "Error line $cur_line_num: bad gender for: $person: $gender\n" if $gender !~ /[hfm]/;
		$sql_fields .= ($sql_fields ne '' ? ", " : "") . "gender";
		$sql_values .= ($sql_values ne '' ? ", " : "") . ($gender eq 'f' ? "'female'" : "'male'");
	}

	#-- Exhaust all possible #... parameters
	
	my $foundSomething = 0;
	do
	{
		$foundSomething = 0;
		
		# Firstname alias: {}
		if ($person =~ /(.*)(?:\s|^)\{(\S+)\}(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			push @firstname_alias, $str;
			$foundSomething = 1;
		}
		
		# Lastname alias: #salias
		if ($person =~ /(.*)(?:\s|^)\#salias\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			push @lastname_alias, $str;
			$foundSomething = 1;
		}
		
		# Public name: ()
		if ($person =~ /(.*)(?:\s|^)\((\S+)\)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "public_name";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}

		# Image: #image
		if ($person =~ /(.*)(?:\s|^)\#image\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "image";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$2'";
			$foundSomething = 1;
		}
		
		# Nickname: #nick
		if ($person =~ /(.*)(?:\s|^)\#nick\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			push @nickname, $str;
			$foundSomething = 1;
		}
		
		# Alias: #alias
		if ($person =~ /(.*)(?:\s|^)\#alias\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			push @alias, $str;
			$foundSomething = 1;
		}

		# Titles: []
		if ($person =~ /(.*)(?:\s|^)\[(\S+)\](?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			push @title, $str;
			$foundSomething = 1;
		}

		# Access: #apubl, #apriv
		if ($person =~ /(.*)(?:\s|^)\#(apubl|apriv)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "access";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$2'";
			$foundSomething = 1;
		}

		# Occupation: #occu
		if ($person =~ /(.*)(?:\s|^)\#occu\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "occupation";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}
		
		# PersonSource: #src
		if ($person =~ /(.*)(?:\s|^)\#src\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "source";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}
		
		# BirthSource: #bs
		if ($person =~ /(.*)(?:\s|^)\#bs\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "birth_source";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}
		
		# BirthPlace: #bp
		if ($person =~ /(.*)(?:\s|^)\#bp\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "birth_place";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
			$has_birthplace = 1;
		}
		
		# BaptizeDate: !
		if ($person =~ /(.*)(?:\s|^)\!(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = $2;
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "baptize_date";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			if (isFullDate($str))
			{
				$sql_fields .= ($sql_fields ne '' ? ", " : "") . "baptize_date_sql";
				$sql_values .= ($sql_values ne '' ? ", " : "") . extractSQLdate($str);
			}
			$foundSomething = 1;
		}
		
		# BaptizePlace: #pp
		if ($person =~ /(.*)(?:\s|^)\#pp\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "baptize_place";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}

		# BaptizeSource: #ps
		if ($person =~ /(.*)(?:\s|^)\#ps\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "baptize_source";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}
		
		# PlaceOfDeath: #dp
		if ($person =~ /(.*)(?:\s|^)\#dp\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "death_place";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}

		# DeathSource: #ds
		if ($person =~ /(.*)(?:\s|^)\#ds\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "death_source";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}
		
		# Burial or Cremation: #buri|#crem [Date]
		if ($person =~ /(.*)(?:\s|^)\#(buri|crem)(?:\s([^\s\#]+))?(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $4);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "burial";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$2'";
			if (defined($3))
			{
				$str = $3;
				$sql_fields .= ($sql_fields ne '' ? ", " : "") . "burial_date";
				$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
				if (isFullDate($str))
				{
					$sql_fields .= ($sql_fields ne '' ? ", " : "") . "burial_date_sql";
					$sql_values .= ($sql_values ne '' ? ", " : "") . extractSQLdate($str);
				}
			}
			$foundSomething = 1;
		}
		
		# BurialPlace: #rp
		if ($person =~ /(.*)(?:\s|^)\#rp\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "burial_place";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}
		
		# BurialSource: #rs
		if ($person =~ /(.*)(?:\s|^)\#rs\s(\S+)(?:\s|$)(.*)/)
		{
			$person = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "burial_source";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			$foundSomething = 1;
		}
	} while ($foundSomething);
	
	
	#-- We are done parsing, check for a default birth place
	if (!$has_birthplace && defined($default_birth_place) && $default_birth_place ne '')
	{
		$sql_fields .= ($sql_fields ne '' ? ", " : "") . "birth_place";
		$sql_values .= ($sql_values ne '' ? ", " : "") . "'$default_birth_place'";
	}
	
	#-- Now, we should have only: 
	#	in child format : Firstname[.number] [Lastname] [DoB [DoD]]
	#	in normal format: Lastname Firstname[.number] [DoB [DoD]]
	
	if ($isChild)
	{
		$person =~ s/\s\?$//; # We do NOT support the "probably dead" feature that uses a '?' as a DoD
		
		if ($person =~ /^(\S+)\s(\S+)\s(\S+)\s(\S+)$/)
		{ # Firstname[.number] Lastname DoB DoD
			#print "processPerson: case 1: >$person<\n";
			$firstname = $1;
			$lastname = replace_($2);
			$birth_date = $3;
			$death_date = $4;
		}
		elsif ($person =~ /^(\S+)\s(\S+)\s(\S+)$/)
		{ # Firstname[.number] Lastname DoB
		  # Firstname[.number] DoB DoD
		  # Since there is ambiguity :-(, we need to test the format of the dates to decide
			$firstname = $1;
			if (isDate($2) && isDate($3))
			{ #	Firstname[.number] DoB DoD
				#print "processPerson: case 2A: >$person<\n";
				$lastname = $default_lastname;
				$birth_date = $2;
				$death_date = $3;
			}
			else
			{ # Firstname[.number] Lastname DoB
				#print "#processPerson: case 2B: >$person<\n";
				$lastname = replace_($2);
				$birth_date = $3;
			}
		}
		elsif ($person =~ /^(\S+)\s(\S+)$/)
		{ # Firstname[.number] DoB
		  # Firstname[.number] Lastname
		  # Here again, there is ambiguity :-(
			if (isDate($2))
			{
				#print "#processPerson: case 3A: >$person<\n";
				$firstname = $1;
				$lastname = $default_lastname;
				$birth_date = $2;
			}
			else
			{
				#print "#processPerson: case 3B: >$person<\n";
				$firstname = $1;
				$lastname = replace_($2);
			}
		}
		elsif ($person =~ /^(\S+)$/)
		{ # Firstname[.number]
			#print "#processPerson: case 4: >$person<\n";
			$firstname = $1;
			$lastname = $default_lastname;
		}
		else
		{
			die "Error line $cur_line_num: malformed child person: >$person<\n";
		}
		
		if ($firstname =~ /^(.*)\.(\d+)$/)
		{
			$firstname = replace_($1);
			$number = $2; 
		}
		else
		{
			$firstname = replace_($firstname);
			$number = '0';
		}
	}
	else
	{
		if ($person =~ /^(\S+)\s(\S+)(?:\s(\S+)(?:\s(\S+))?)?$/)
		{ # Lastname Firstname[.number] [DoB [DoD]]
			$lastname = replace_($1);
			$firstname = $2;
			$birth_date = $3;
			if (defined($4))
			{
				$death_date = $4;
			}
			if ($firstname =~ /^(.*)\.(\d+)$/)
			{
				$firstname = replace_($1);
				$number = $2; 
			}
			else
			{
				$firstname = replace_($firstname);
				$number = '0';
			}
		}
		else
		{
			die "Error line $cur_line_num: malformed person: >$person<\n";
		}
	}

	if (defined($birth_date) && ($birth_date ne ''))
	{
		$sql_fields .= ($sql_fields ne '' ? ", " : "") . "birth_date";
		$sql_values .= ($sql_values ne '' ? ", " : "") . "'$birth_date'";
		if (isFullDate($birth_date))
		{
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "birth_date_sql";
			$sql_values .= ($sql_values ne '' ? ", " : "") . extractSQLdate($birth_date);
		}
	}
	if (defined($death_date) && ($death_date ne ''))
	{
		$sql_fields .= ($sql_fields ne '' ? ", " : "") . "death_date";
		$sql_values .= ($sql_values ne '' ? ", " : "") . "'$death_date'";
		if (isFullDate($death_date))
		{
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "death_date_sql";
			$sql_values .= ($sql_values ne '' ? ", " : "") . extractSQLdate($death_date);
		}
	}

	$lastname = replace_unknown($lastname);
	$firstname = replace_unknown($firstname);

	$sql_fields = "lastname, firstname, number" . ($sql_fields ne '' ? ", " : "") . $sql_fields;
	$sql_values = "'$lastname', '$firstname', '$number'" . ($sql_values ne '' ? ", " : "") . $sql_values;

	print FO "INSERT INTO ".$dbprefix."person ($sql_fields) VALUES ($sql_values);\n";
	
	#-- Now we can store the various aliases, titles, etc.
	my $value;
	for $value (@alias)
	{
		print FO "INSERT INTO ".$dbprefix."aliases (lastname, firstname, number, alias_type, alias_value) VALUES ('$lastname', '$firstname', '$number', 'alias', '$value');\n";
	}
	for $value (@firstname_alias)
	{
		print FO "INSERT INTO ".$dbprefix."aliases (lastname, firstname, number, alias_type, alias_value) VALUES ('$lastname', '$firstname', '$number', 'firstname_alias', '$value');\n";
	}
	for $value (@lastname_alias)
	{
		print FO "INSERT INTO ".$dbprefix."aliases (lastname, firstname, number, alias_type, alias_value) VALUES ('$lastname', '$firstname', '$number', 'lastname_alias', '$value');\n";
	}
	for $value (@nickname)
	{
		print FO "INSERT INTO ".$dbprefix."aliases (lastname, firstname, number, alias_type, alias_value) VALUES ('$lastname', '$firstname', '$number', 'nickname', '$value');\n";
	}
	for $value (@title)
	{
		print FO "INSERT INTO ".$dbprefix."aliases (lastname, firstname, number, alias_type, alias_value) VALUES ('$lastname', '$firstname', '$number', 'title', '$value');\n";
	}

	return ($lastname, $firstname, $number);

}#processPerson

#-- Initializations

my $usage =
"gw2sql  [--nocreate] [-i infile.gw] [-o outfile.sql]
	";

my $infile = '-';	# Default: stdin
my $outfile = '-';	# Default: stdout
$nocreate = 0;		# Default: print the table creation instructions
$nodata = 0;		# Default: print the data
$mysql3 = 0;		# Default: we use MySQL 4.0 or better
$relax = 0;			# Default: we stop at the first unknown line encountered (strict parsing)

my %options;
my $result = GetOptions(
						"o=s" => \$outfile,
						"i=s" => \$infile,
						"nocreate|n" => \$nocreate,
						"nodata" => \$nodata,
						"mysql3" => \$mysql3,
						"relax" => \$relax
						);
open (FO, ">".$outfile) || die "Cannot open '$outfile' because of $!\n";

#-- Output the table creation instructions
if (!$nocreate)
{
	print FO "
		DROP TABLE IF EXISTS ".$dbprefix."family;
		CREATE TABLE ".$dbprefix."family (
			id int(10) unsigned NOT NULL auto_increment,
			h_lastname varchar(255) NOT NULL default '',
			h_firstname varchar(255) NOT NULL default '',
			h_number varchar(16) NOT NULL default '',
			w_lastname varchar(255) NOT NULL default '',
			w_firstname varchar(255) NOT NULL default '',
			w_number varchar(16) NOT NULL default '',
			wedding_date varchar(255) default NULL,
			wedding_date_sql date default NULL,
			separated varchar(8) default NULL,
			divorced varchar(8) default NULL,
			divorce_date varchar(255) default NULL,
			divorce_date_sql date default NULL,
			not_married varchar(8) default NULL,
			engaged varchar(8) default NULL,
			wedding_place varchar(255) default NULL,
			wedding_source varchar(255) default NULL,
			comments varchar(255) default NULL,
			source varchar(255) default NULL,
			PRIMARY KEY  (id),
			KEY wedding_date_sql (wedding_date_sql)
			) COMMENT='Family';

		DROP TABLE IF EXISTS ".$dbprefix."person;
		CREATE TABLE ".$dbprefix."person (
			lastname varchar(220) NOT NULL default '',
			firstname varchar(220) NOT NULL default '',
			number int(11) NOT NULL default '0',
			gender enum('male', 'female') default NULL,
			birth_date varchar(255) default NULL,
			birth_date_sql date default NULL,
			birth_place varchar(255) default NULL,
			birth_source varchar(255) default NULL,
			death_date varchar(255) default NULL,
			death_date_sql date default NULL,
			death_place varchar(255) default NULL,
			death_source varchar(255) default NULL,
			baptize_date varchar(255) default NULL,
			baptize_date_sql date default NULL,
			baptize_place varchar(255) default NULL,
			baptize_source varchar(255) default NULL,
			burial varchar(255) default NULL,
			burial_date varchar(255) default NULL,
			burial_date_sql date default NULL,
			burial_place varchar(255) default NULL,
			burial_source varchar(255) default NULL,
			public_name varchar(255) default NULL,
			image varchar(255) default NULL,
			access varchar(16) default NULL,
			occupation varchar(255) default NULL,
			source varchar(255) default NULL,
			notes text,
			
			PRIMARY KEY  (lastname(100),firstname(100),number),
			KEY lastname (lastname),
			KEY birth_date_sql (birth_date_sql),
			KEY death_date_sql (death_date_sql)
			) COMMENT='Person';

		DROP TABLE IF EXISTS ".$dbprefix."family_witness;
		CREATE TABLE ".$dbprefix."family_witness (
			familyID int(11) NOT NULL default '0',
			lastname varchar(255) NOT NULL default '',
			firstname varchar(255) NOT NULL default '',
			number int(11) NOT NULL default '0',
			KEY familyID (familyID)
			) COMMENT='Family Witness';

		DROP TABLE IF EXISTS ".$dbprefix."family_child;
		CREATE TABLE ".$dbprefix."family_child (
			familyID int(11) NOT NULL default '0',
			lastname varchar(255) NOT NULL default '',
			firstname varchar(255) NOT NULL default '',
			number int(11) NOT NULL default '0',
			KEY familyID (familyID)
			) COMMENT='Family Child';
		DROP TABLE IF EXISTS ".$dbprefix."aliases;
		CREATE TABLE ".$dbprefix."aliases (
			lastname varchar(220) NOT NULL default '',
			firstname varchar(220) NOT NULL default '',
			number int(11) NOT NULL default '0',
			alias_type enum('alias','firstname_alias','lastname_alias','nickname','title') NOT NULL default 'alias',
			alias_value varchar(255) default NULL,
			KEY lastfirstnum (lastname(100),firstname(100),number)
			) COMMENT='Aliases and others';
		DROP TABLE IF EXISTS ".$dbprefix."relationship;
		CREATE TABLE ".$dbprefix."relationship (
			lastname varchar(220) NOT NULL default '',
			firstname varchar(220) NOT NULL default '',
			number int(11) NOT NULL default '0',
			rel_type varchar(32) NOT NULL default '',
			rel_lastname varchar(220) NOT NULL default '',
			rel_firstname varchar(220) NOT NULL default '',
			rel_number int(11) NOT NULL default '0',
			KEY lastfirstnum (lastname(100),firstname(100),number)
		   ) COMMENT='Relationships';
		DROP TABLE IF EXISTS ".$dbprefix."globaldata;
		CREATE TABLE ".$dbprefix."globaldata (
			name varchar(255) NOT NULL default '',
			data1 varchar(255) default NULL,
			data2 varchar(255) default NULL,
			data3 text,
			KEY name (name)
		) COMMENT='Global Data';
		DROP TABLE IF EXISTS ".$dbprefix."extpage;
		CREATE TABLE ".$dbprefix."extpage (
			name varchar(255) NOT NULL default '',
			data text,
			KEY name (name)
		) COMMENT='Extended pages';

";
}

if ($nodata)
{ # We are not interested in the input data, let's terminate
	close FO;
	exit;
}

open (FI, $infile) || die "Cannot open '$infile' because of $!\n";

#-- Main Loop ---- Here we perform the main processing
my $father_lastname = '';	# Default last name when processing children
my $notes = '';
my @person;
my @relationee;		# Used to store the current person that relations
my $birth_place_common = '';	# Common birth place for a family (#cbp tag)

while (<FI>)
{
	$cur_line = $_;
	$cur_line_num++;

	if (/^end notes$/)
	{
		if ($current_list eq 'notes')
		{
			print FO "UPDATE ".$dbprefix."person SET notes='$notes' WHERE lastname='$person[0]' AND firstname='$person[1]' AND number='$person[2]';\n";
		}
		elsif ($current_list eq 'gnotes')
		{
				print FO "INSERT INTO ".$dbprefix."globaldata (name, data3) VALUES ('Global Notes', '$notes');\n";
		}
		$current_list = '';
	}
	elsif (/^end page-ext$/)
	{ # End of extended page
		print FO "INSERT INTO ".$dbprefix."extpage (name, data) VALUES ('$page_name', '$notes');\n";
		$current_list = '';
		$page_name = '';
	}
	elsif (/^end wizard-note$/)
	{ # End of Wizard Notes
		print FO "INSERT INTO ".$dbprefix."globaldata (name, data1, data3) VALUES ('wizard-note', '$wizard_name', '$notes');\n";
		$current_list = '';
		$wizard_name = '';
	}
	elsif (/^end notes-db$/)
	{ # End of Notes database
		print FO "INSERT INTO ".$dbprefix."globaldata (name, data3) VALUES ('notes-db', '$notes');\n";
		$current_list = '';
	}
	elsif (/^beg$/)
	{ # Start of family or relation children or note: skip it
	}
	elsif ($current_list eq 'notes' || $current_list eq 'gnotes' || $current_list eq 'notes-db'  || $current_list eq 'wizard-note' || $current_list eq 'page-ext')
	{
		$notes .= ($notes ne '' ? "\n" : '') . replaceQuote(trim($_));
	}
	elsif (/^fam\s+(.*)\s\+(\S*)\s(.*)/)
	{ # Family
		$current_list = 'fam';

		
		#-- Init default values
		my $str = '';
		my $sql_fields = "";
		my $sql_values = "";
		
		my $husband = '';
		my $wife = '';
		my $wife0 = '';
				
		my $lastname = '';
		my $firstname = '';
		my $number = '0';
		my $h_lastname = '';
		my $h_firstname = '';
		my $h_number = '0';
		my $w_lastname = '';
		my $w_firstname = '';
		my $w_number = '0';
		my $noGenderCheck = 0;
		
		$birth_place_common = '';	# Reset the common birth place
		
		print FO "#------\n";
		$husband = $1;
		if (defined($2) && $2 ne '')
		{
			$str = $2;
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "wedding_date";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
			if (isFullDate($str))
			{
				$sql_fields .= ($sql_fields ne '' ? ", " : "") . "wedding_date_sql";
				$sql_values .= ($sql_values ne '' ? ", " : "") . extractSQLdate($str);
			}
		}
		$wife = $3;
		$wife0 = $3;
		# Now we need to remove from $wife all the data that is relevant to the family. Whatever remains will be the real wife data.
		# We will construct the SQL statement on the fly
		
		
		#-- Separated: #sep
		if ($wife =~ /(.*)(?:\s|^)\#sep\b(.*)/)
		{
			$wife = trim($1 . " " . $2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "separated";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "1";
		}
		
		#-- Divorce: -
		if ($wife =~ /(.*)(?:\s|^)-(\S*)\s(.*)/)
		{
			$wife = trim($1 . " " . $3);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "divorced";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "1";
			if (defined($2) && $2 ne '')
			{
				$str = $2;
				$sql_fields .= ($sql_fields ne '' ? ", " : "") . "divorce_date";
				$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
				if (isFullDate($str))
				{
					$sql_fields .= ($sql_fields ne '' ? ", " : "") . "divorce_date_sql";
					$sql_values .= ($sql_values ne '' ? ", " : "") . extractSQLdate($str);
				}
			}
		}
		
		#-- No gender check: #nsck mf or #nsckm ??
		if ($wife =~ /(.*)(?:\s|^)\#nsckm? (?:mf|fm|mm|ff|\?\?)\s(.*)/)
		{	# ??? we probably need better handling of these parameter. Is it intended for same-gender couples?
			$wife = trim($1 . " " . $2);
			$noGenderCheck = 1;
		}
		
		#-- No gender mention: #noment
		if ($wife =~ /(.*)(?:\s|^)\#noment\s(.*)/)
		{
			$wife = trim($1 . " " . $2);
			# ??? I do not know what I am supposed to do with this parameter. I will assume a "no Gender Check".
			$noGenderCheck = 1;
		}
		
		#-- Not Married: #nm
		if ($wife =~ /(.*)(?:\s|^)\#nm\s(.*)/)
		{
			$wife = trim($1 . " " . $2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "not_married";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "1";
		}
		
		#-- Engaged: #eng
		if ($wife =~ /(.*)(?:\s|^)\#eng\s(.*)/)
		{
			$wife = trim($1 . " " . $2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "engaged";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "1";
		}

		#-- Wedding place: #mp
		if ($wife =~ /(.*)(?:\s|^)\#mp\s(\S+)\s(.*)/)
		{
			$wife = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "wedding_place";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
		}
		
		#-- Wedding source: #ms
		if ($wife =~ /(.*)(?:\s|^)\#ms\s(\S+)\s(.*)/)
		{
			$wife = trim($1 . " " . $3);
			$str = replace_($2);
			$sql_fields .= ($sql_fields ne '' ? ", " : "") . "wedding_source";
			$sql_values .= ($sql_values ne '' ? ", " : "") . "'$str'";
		}
		
		#-- Extract names of husband and wife
		if (isReference($husband))
		{
			($h_lastname, $h_firstname, $h_number) = extractReference($husband);
		}
		else
		{
			($h_lastname, $h_firstname, $h_number) = processPerson($husband, ($noGenderCheck ? '' : 'h'), '', 0);
		}
		$father_lastname = $h_lastname;	# Keep is as the default last name when processing a child
		
		if (isReference($wife))
		{
			($w_lastname, $w_firstname, $w_number) = extractReference($wife);
		}
		else
		{
			($w_lastname, $w_firstname, $w_number) = processPerson($wife, ($noGenderCheck ? '' : 'f'), '', 0);
		}

		#-- Finish constructing the SQL INSERT instruction
		
		$sql_fields = "h_lastname, h_firstname, h_number, w_lastname, w_firstname, w_number" . ($sql_fields ne '' ? ", " : "") . $sql_fields;
		$sql_values = "'$h_lastname', '$h_firstname', '$h_number', '$w_lastname', '$w_firstname', '$w_number'" . ($sql_values ne '' ? ", " : "") . $sql_values;

		print FO "INSERT INTO ".$dbprefix."family ($sql_fields) VALUES ($sql_values);\n";
		print FO "SELECT \@last_family_id := last_insert_id();\n";
		
	} #family
	elsif (/^c?src\s+(.*)/)
	{ # Family Source
		my $familySource = replace_($1);
		print FO "UPDATE ".$dbprefix."family SET source='$familySource' WHERE id=\@last_family_id;\n";
	}
	elsif (/^comm\s+(.*)/)
	{ # Family comments
		my $familyComments = replaceQuote($1);
		print FO "UPDATE ".$dbprefix."family SET comments='$familyComments' WHERE id=\@last_family_id;\n";
	}
	elsif (/^wit(?: (m|f))?:\s+(.*)/)
	{ # Family witness. They are stored in a separate table
		my $gender_code = defined($1) ? $1 : '';
		if (isReference($2))
		{
			@person = extractReference($2);
		}
		else
		{
			my $gender = ($gender_code eq 'm') ? 'h' : $gender_code;
			@person = processPerson($2, $gender, '', 0);
		}
		print FO "INSERT INTO ".$dbprefix."family_witness (familyID, lastname, firstname, number) VALUES (\@last_family_id, '$person[0]', '$person[1]', '$person[2]');\n";
	}
	elsif (/^rel\s+(.*)$/)
	{ # Relation
		$current_list = 'rel';
		if (isReference($1))
		{
			@relationee = extractReference($1);
		}
		else
		{
			my $pers = $1;
			if ($pers =~ /^(.*)\s+#(h|f|m)$/)
			{
				@relationee = processPerson($1, $2, '', 0);
			}
			else
			{
				@relationee = processPerson($pers, '', '', 0);
			}
		}
	}
	elsif (/^end$/)
	{ # End of family children
		$current_list = '';
	}
	elsif (/^\-\s(.*)$/ && ($current_list eq 'fam' || $current_list eq 'rel'))
	{ # Child or relation
		
		if (($current_list eq 'fam') && ($1 =~ /^(?:(h|f)\s)?(.*)$/))
		{
			$gender = defined($1) ? $1 : '';
			$person = $2;
			($lastname, $firstname, $number) = processPerson($person, $gender, $father_lastname, 1, $birth_place_common);
			print FO "INSERT INTO ".$dbprefix."family_child (familyID, lastname, firstname, number) VALUES (\@last_family_id, '$lastname', '$firstname', '$number');\n";
		}
		elsif (($current_list eq 'rel'))
		{
			if ($1 =~ /^(adop|reco|cand|godp|fost) (fath|moth):\s(.*)$/)
			{
				my $rel_type = $1 . " " . $2;
				$gender = ($2 eq 'fath') ? 'h' : 'f';
				$person = $3;
				if (isReference($person))
				{
					@person = extractReference($person);
				}
				else
				{
					@person = processPerson($person, $gender, '', 0);
				}
				print FO "INSERT INTO ".$dbprefix."relationship (lastname, firstname, number, rel_type, rel_lastname, rel_firstname, rel_number) VALUES ('$relationee[0]', '$relationee[1]', '$relationee[2]', '$rel_type', '$person[0]', '$person[1]', '$person[2]');\n";
			}
			elsif ($1 =~ /^(adop|reco|cand|godp|fost):\s(.*)\s\+\s(.*)$/)
			{ # Two persons: create two records
				my $rel_type = $1;
				my @father;
				my @mother;
				if (isReference($2))
				{
					@father = extractReference($2);
				}
				else
				{
					@father = processPerson($2, 'h', '', 0);
				}
				if (isReference($3))
				{
					@mother = extractReference($3);
				}
				else
				{
					@mother = processPerson($3, 'f', '', 0);
				}
				print FO "INSERT INTO ".$dbprefix."relationship (lastname, firstname, number, rel_type, rel_lastname, rel_firstname, rel_number) VALUES ('$relationee[0]', '$relationee[1]', '$relationee[2]', '$rel_type fath', '$father[0]', '$father[1]', '$father[2]');\n";
				print FO "INSERT INTO ".$dbprefix."relationship (lastname, firstname, number, rel_type, rel_lastname, rel_firstname, rel_number) VALUES ('$relationee[0]', '$relationee[1]', '$relationee[2]', '$rel_type moth', '$mother[0]', '$mother[1]', '$mother[2]');\n";
			}
			else
			{
				die "Error line $cur_line_num: could not process relation: >$cur_line<\n";
			}
		}
	}
	elsif (/^notes\s+(.+)$/)
	{ # Person notes
		$current_list = 'notes';
		@person = extractReference($1);
		$notes = '';
	}
	elsif (/^notes$/)
	{ # Global notes
		$current_list = 'gnotes';
		$notes = '';
	}
	elsif (/^notes-db$/)
	{ # Notes database
		$current_list = 'notes-db';
		$notes = '';
	}
	elsif (/^wizard-note (.*)$/)
	{ # Wizard Notes
		$current_list = 'wizard-note';
		$wizard_name = replaceQuote($1);
		$notes = '';
	}
	elsif (/^page-ext (.*)$/)
	{ # Extended page
		$current_list = 'page-ext';
		$page_name = replaceQuote($1);
		$notes = '';
	}
	elsif (/^cbp (.*)/)
	{ # Common birth place
		$birth_place_common = replace_($1);
	}
	elsif (/^encoding: (.*)$/)
	{ # Change encoding modes for I/O
	  # Warning: you need MySQL 4.0 or better to be able to use these encoding commands
		$encoding = $1;
		my $perl_encoding = $encoding;
		$perl_encoding =~ s/-//g;
		print FO "# Switching to encoding: $encoding\n";
		binmode(FI, ":$perl_encoding");
		binmode(FO, ":$perl_encoding");
		
		# We need to translate the encoding names to something MySQL can understand
		my %sql_sets = (
			"utf-8" => "utf8",
			"iso-8859-1" => "latin1",
			"8859-1" => "latin1",
			"iso-8859-2" => "latin2",
			"8859-2" => "latin2",
			);
		
		if (!$mysql3 && $sql_sets{$encoding} ne '')
		{
			print FO "ALTER TABLE ".$dbprefix."family DEFAULT CHARACTER SET ".$sql_sets{$encoding}.";\n";
			print FO "ALTER TABLE ".$dbprefix."person DEFAULT CHARACTER SET ".$sql_sets{$encoding}.";\n";
			print FO "ALTER TABLE ".$dbprefix."family_witness DEFAULT CHARACTER SET ".$sql_sets{$encoding}.";\n";
			print FO "ALTER TABLE ".$dbprefix."family_child DEFAULT CHARACTER SET ".$sql_sets{$encoding}.";\n";
			print FO "ALTER TABLE ".$dbprefix."aliases DEFAULT CHARACTER SET ".$sql_sets{$encoding}.";\n";
			print FO "ALTER TABLE ".$dbprefix."relationship DEFAULT CHARACTER SET ".$sql_sets{$encoding}.";\n";
			print FO "ALTER TABLE ".$dbprefix."globaldata DEFAULT CHARACTER SET ".$sql_sets{$encoding}.";\n";
		}
	}
	elsif (/^#.*$/)
	{ # Comment: skip it
		# Nothing to do
	}
	elsif (trim($_) ne '')
	{
		$cur_line = trim($cur_line);
		if ($relax)
		{
			print FO "# Warning line $cur_line_num: unknown line: >$cur_line<\n";
		}
		else
		{
			die "Error line $cur_line_num: unknown line: >$cur_line<";
		}
	}
	

}

#-- Clean-up
close FI;
close FO;


