use strict; use DBI; open STDERR,">&STDOUT"; my($cg_db_host,$cg_db_name,$cg_db_user,$cg_db_pass,$cg_db_sheet,$cg_db_table); my($use_password,$cg_db_port); foreach my $argument (0..$#ARGV) { if ($ARGV[$argument]=~/^-[^h\s]*h/) { # -h is HOST $cg_db_host=$ARGV[$argument+1]; } if ($ARGV[$argument]=~/^-[^d\s]*d/) { # -d is DATABASE $cg_db_name=$ARGV[$argument+1]; } if ($ARGV[$argument]=~/^-[^u\s]*u/) { # -u is USER $cg_db_user=$ARGV[$argument+1]; } if ($ARGV[$argument]=~/^-[^p\s]*p/) { # -p is PASSWORD $cg_db_pass=$ARGV[$argument+1]; } if ($ARGV[$argument]=~/^-[^s\s]*s/) { # -s is SHEET $cg_db_sheet=2+$ARGV[$argument+1]; } if ($ARGV[$argument]=~/^-[^P\s]*P/) { # -P is PORT $cg_db_port=$ARGV[$argument+1]; } if ($ARGV[$argument]=~/^-[^t\s]*t/) { # -t is TABLE $cg_db_table=$ARGV[$argument+1]; } } my $file=$ARGV[$#ARGV]; $cg_db_host='' if (!defined($cg_db_host)); $cg_db_name='' if (!defined($cg_db_name)); $cg_db_pass='' if (!defined($cg_db_pass)); $cg_db_port='' if (!defined($cg_db_port)); $cg_db_table='' if (!defined($cg_db_table)); $cg_db_sheet='' if (!defined($cg_db_sheet)); $cg_db_user='' if (!defined($cg_db_user)); $file='' if (!defined($file)); $cg_db_sheet=2 if ($cg_db_sheet eq ''); &print_usage if ($file eq '' || $cg_db_name eq '' || $cg_db_table eq ''); my $dsn; if ($cg_db_host eq '') { $dsn="DBI:mysql:$cg_db_name"; }else{ $dsn="DBI:mysql:database=$cg_db_name;host=$cg_db_host"; $dsn.=";port=$cg_db_port" if ($cg_db_port ne ''); } my $dbh = DBI->connect($dsn,$cg_db_user,$cg_db_pass); $dbh->{AutoCommit}=1; if ($dbh->err) { print "Error connecting to database: ".$dbh->errstr; exit; } &import_xls_to_database($file,$cg_db_sheet,$cg_db_table,$dbh); $dbh->disconnect; sub import_xls_to_database { my $file=$_[0]; my $import_sheet=$_[1]; my $import_table=$_[2]; my $dbh = $_[3]; my $query_string="CREATE TABLE `$import_table` ("; my $insert_string="INSERT INTO `$import_table` VALUES ("; my $columns=0; my (@sheets); @sheets=&parse_excel($file); my %name_hash; foreach my $column_name (@{${$sheets[$import_sheet]}[0]}) { $column_name=~s/[^A-Za-z0-9_ ]/_/g; if (defined($name_hash{$column_name})) { $name_hash{$column_name}+=1; $query_string.="`$column_name$name_hash{$column_name}` text, "; $insert_string.="?,"; } else { $name_hash{$column_name}=0; $query_string.="`$column_name` text, "; $insert_string.="?,"; } $columns+=1; } $query_string=substr($query_string,0,-2); $insert_string=substr($insert_string,0,-1); $query_string.=")"; $insert_string.=")"; $dbh->do("DROP TABLE `$import_table`"); if ($dbh->err) { print "\nBut that's probobly OK. Don't worry. I'm just too lazy to fix this.\n"; } my $create_table = $dbh->prepare($query_string); $create_table->execute(); $create_table->finish; $dbh->do("LOCK TABLES `$import_table` WRITE"); my $insert_query = $dbh->prepare($insert_string); foreach my $row (@{$sheets[$import_sheet]}[1..$#{$sheets[$import_sheet]}]) { @{$row}[$columns-1].=""; foreach my $cell (@{$row}) { $cell.=""; } $insert_query->execute(@{$row}); } $insert_query->finish; $dbh->do("UNLOCK TABLES"); } sub parse_excel { my $dir=$_[0]; my $buf; my $excel_file; open FILE,$dir; binmode FILE; my $file_size=( -s $dir ); # $$ my $find_string="W\x00o\x00r\x00k\x00b\x00o\x00o\x00k\x00"; for (1..(64-length($find_string))) { $find_string.="\x00"; } $find_string .= "\x12\x00\x02\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"; for (1..36) { $find_string .= "\x00"; } my $length; my $offset; while (!defined($offset) || $offset==-1) { my $excel_block; if (read(FILE,$excel_block,512)) { if ($excel_block=~m/\Q$find_string\E(.{4})(.{4})/s) { $offset=512*(unpack("V",$1)+1); $length=unpack("V",$2); } } else { $offset=-1; die("Can't find spreadsheet!"); } } my @sheets=parse(*FILE{IO},$offset,$length); close FILE; return @sheets; } sub _RK_to_num { my ($RK) = @_; my $type = $RK & 0x3; my $val = ($type & 2) ? int($RK/4) : unpack("d", "\0\0\0\0".pack ("V", $RK ^ $type)) ; $val /= 100.0 if $type &1; # / $val; } sub _float_to_date { # F my ($date) = @_; # $date; my @monsum = ( 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, -1, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335 ); my ($day, $month, $year, $switch, $i); $year = int( $date/365.2425 ) + 1900; $switch = !($year%4) && 12 || 0; $date -= int($year-1900)*365 + int(($year-1900)/4); #/ for($i=11; $i && ($date <= $monsum[$switch+$i]); $i--) {} $month = $i+1; $day = $date - $monsum[$switch+$i]; $date = sprintf("%02d.%02d.%02d", $day, $month, $year); } sub parse { my $sheet=0; my $file=$_[0]; my $offset=$_[1]; my $fsize=$_[2]; my ($l, $o, $type, $buf, @sheets, @strings); my ($row, $col, $style, $len, $num); $o = 0; while ($o<$fsize) { seek($file,$offset+$o,0); read($file,$type,2); read($file,$l,2); $l=unpack ("v",$l); read($file,$buf,$l); $o+=4; if ("\x00\x00" eq $type) { } elsif ("\x07\x02" eq $type) { my ($row, $col, $style, $data, $type) = get_struct("WWWBB", \$buf, 0); if ($type==0) { ${${$sheets[$sheet]}[$row]}[$col]=$data; }else{ if ($data == 0) { ${${$sheets[$sheet]}[$row]}[$col]="#NULL!"; }elsif ($data == 7) { ${${$sheets[$sheet]}[$row]}[$col]="#DIV/0"; }elsif ($data == 0x0f) { ${${$sheets[$sheet]}[$row]}[$col]="#VALUE!"; }elsif ($data == 0x17) { ${${$sheets[$sheet]}[$row]}[$col]="#REF!"; }elsif ($data == 0x1d) { ${${$sheets[$sheet]}[$row]}[$col]="#NAME?"; }elsif ($data == 0x24) { ${${$sheets[$sheet]}[$row]}[$col]="#NUM!"; }elsif ($data == 0x2a) { ${${$sheets[$sheet]}[$row]}[$col]="#N/A"; } } } elsif ("\x07\x00" eq $type) { my ($row, $col, $style, $RK) = get_struct("WWWL", \$buf, 0); ${${$sheets[$sheet]}[$row]}[$col]=_RK_to_num($RK); } elsif ("\xbd\x00" eq $type) { ($row, $col) = get_nword(2, \$buf, 0); my $n = ($l - 6) / 6; #/ for (1..$n) { my ($style, $RK) = get_struct("WL", \$buf, 4+($_-1)*6); ${${$sheets[$sheet]}[$row]}[$col+$_-1]=_RK_to_num($RK); } } elsif ("\xbe\x00" eq $type) { ($row, $col) = get_nword(2, \$buf, 0); my $n = ($l - 6) / 2; #/ my @style = get_nword($n, \$buf, 4); for (1..$n) { ${${$sheets[$sheet]}[$row]}[$col+$_-1]=""; } } elsif ("\xfc\x00" eq $type) { @strings=(); my $os=0; my $n = get_long(\$buf, $os); $os+=4; my $n2 = get_long(\$buf,$os); $os+=4; my $sect_len=$l; my $c; my $l1; my $s; $c=8; for (0..$n2-1) { my $start = ""; $l1 = get_word(\$buf, $os); $c+=3; if ($c+$l1 > $sect_len) { $start=substr($buf,$os+3,($sect_len-$c)); $os+=($sect_len-$c)+5; $l1 -= ($sect_len-$c); $o+=$l; seek($file,$offset+$o,0); read($file,$type,2); read($file,$l,2); $l=unpack ("v",$l); read($file,$buf,$l); $o+=4; $sect_len=$l; $c=$l1+1; $os=1; push(@strings, $start.substr($buf,$os,$l1)); $os+=$l1; } elsif ($c+$l1 == $sect_len) { push(@strings, substr($buf, $os+3, $l1)); $os+=(3+$l1); $c+=$l1; $os+=2; $o+=$l; seek($file,$offset+$o,0); read($file,$type,2); read($file,$l,2); $l=unpack ("v",$l); read($file,$buf,$l); $o+=4; $sect_len=$l; $os=0; $c=0; } else { push(@strings, substr($buf, $os+3, $l1)); $os+=(3+$l1); $c+=$l1; } } } elsif ("\xfd\x00" eq $type) { my ($row, $col, $style) = get_nword(3, \$buf, 0); my $i = get_long(\$buf, 6); ${${$sheets[$sheet]}[$row]}[$col]=$strings[$i]; } elsif ("\x01\x02" eq $type) { my ($row, $col, $style) = get_nword(3, \$buf, 0); ${${$sheets[$sheet]}[$row]}[$col]=""; } elsif ("\x03\x02" eq $type) { my ($row, $col, $style, $float) = get_struct("WWWD", \$buf, 0); ${${$sheets[$sheet]}[$row]}[$col]=$float; } elsif ("\x04\x02" eq $type) { # Cell: Text ($row, $col, $style, $len) = get_nword(4, \$buf, 0); ${${$sheets[$sheet]}[$row]}[$col]=substr($buf, 8, $len); } elsif ("\x7e\x02" eq $type) { # Cell: RK number my ($row, $col, $style, $RK) = get_struct("WWWL", \$buf, 0); ${${$sheets[$sheet]}[$row]}[$col]=_RK_to_num($RK); } elsif ("\x09\x08" eq $type) { $sheet+=1; } $o += $l; } return @sheets; } sub get_struct { my @PV = packpar(shift); if (ref($_[1])) { ${$_[1]} += $PV[1]; unpack ($PV[0], substr(${$_[0]}, ${$_[1]}-$PV[1], $PV[1])); } else { unpack ($PV[0], substr(${$_[0]}, $_[1], $PV[1])); } } sub get_nword { if (ref($_[2])) { ${$_[2]}+=$_[0]*2; unpack ("v"."$_[0]", substr(${$_[1]}, ${$_[2]}-$_[0]*2, $_[0]*2)) } else { unpack ("v"."$_[0]", substr(${$_[1]}, $_[2], $_[0]*2)) } } sub packpar { my $str = shift; my $F; my $len = 0; $F = "C"; $len += ($str =~ s/B/$F/g) * 1; $F = "v"; $len += ($str =~ s/W/$F/g) * 2; $F = "V"; $len += ($str =~ s/L/$F/g) * 4; $F = "f"; $len += ($str =~ s/R/$F/g) * 4; $F = "d"; $len += ($str =~ s/D/$F/g) * 8; ($str, $len); } sub get_nlong { if (ref($_[2])) { ${$_[2]}+=$_[0]*4; unpack ("V"."$_[0]", substr(${$_[1]}, ${$_[2]}-$_[0]*4, $_[0]*4)) } else { unpack ("V"."$_[0]", substr(${$_[1]}, $_[2], $_[0]*4)) } } sub get_byte { get_nbyte(1, @_) } sub get_word { get_nword(1, @_) } sub get_long { get_nlong(1, @_) } sub get_real { get_nreal(1, @_) } sub get_double { get_ndouble(1, @_) } sub get_nbyte { if (ref($_[2])) { ${$_[2]}+=$_[0]*1; unpack ("C"."$_[0]", substr(${$_[1]}, ${$_[2]}-$_[0]*1, $_[0]*1)) } else { unpack ("C"."$_[0]", substr(${$_[1]}, $_[2], $_[0]*1)) } } sub get_ndouble { if (ref($_[2])) { ${$_[2]}+=$_[0]*8; unpack ("d"."$_[0]", substr(${$_[1]}, ${$_[2]}-$_[0]*8, $_[0]*8)) } else { unpack ("d"."$_[0]", substr(${$_[1]}, $_[2], $_[0]*8)) } } sub get_nreal { if (ref($_[2])) { ${$_[2]}+=$_[0]*4; unpack ("f"."$_[0]", substr(${$_[1]}, ${$_[2]}-$_[0]*4, $_[0]*4)) } else { unpack ("f"."$_[0]", substr(${$_[1]}, $_[2], $_[0]*4)) } } sub print_usage { print <<END_OF_USAGE; Imports one sheet from a Microsoft Excel file into a MySQL database. Requires: perl (currently installed or you're looking at the code) DBI DBD::MySQL Access to a MySQL server running version 3.23.* or higher Usage: $0 -t <table> -d <database> [-h <host>] [-P <port>] [-u <user>] [-p <password>] [-s <sheet>] <file> <host> = Host of MySQL server <port> = Port of MySQL server if not default <database> = Database name to connect to <user> = User name to use <password> = Password to use. <sheet> = Number of the sheet in the spreadsheet to import (zero-based) <table> = Name of new table to import to. THIS TABLE IS DROPPED <file> = Name of Excel file to import. Comments: -Some of the Excel parsing code was taken from herbert. -Some of the routines may have come from OLE::Storage (I don't remember) -All fields are created as TEXT fields you can always ALTER TABLE later on. -This program should be very easily modified to support any database with a DBD driver. Author: This routine was written by Stephen Hurd while converting a large MS-Access database to perl/Tk/MySQL in a desperate effort to avoid having to re-boot into MS-Windows. The MS-Access database was contantly importing/exporting Excel files. You can e-mail me at shurd\@sk.sympatico.ca Copyright: This program is Copyright 2001 by Stephen Hurd under the BSD license. If the BSD license is not applicable due to an oversight on my part, I hereby release this code into the public domain. Bugs: There's probobly gobs of them. If you find one, e-mail me at shurd\@sk.sympatico.ca tell me the MySQL version, and attach a copy of the spreadsheet. I'll have a look as soon as I can. As always, a patch is MUCH more welcome than a bug report. :-) END_OF_USAGE exit; }