mtp2R.pl

Douglas Bates bates@stat.wisc.edu
Fri, 16 Jan 1998 09:36:41 -0600 (CST)


I received some large data sets in Minitab Portable Worksheet (.mtp)
format and wanted to read them into R.  Here is a _first cut_ at a
perl script that transforms .mtp files into something that can be
read by R. Comments, bug-fixes, enhancements and extensions are welcome.

#!/usr/bin/perl

# Convert a Minitab Portable Worksheet to an R (or S-PLUS) list structure

# $Id: mtp2R.pl,v 1.1 1998/01/16 15:33:27 bates Exp $
# Copyright (C) 1998 Douglas M Bates
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the GNU
# General Public License for more details.
#
# A copy of the GNU General Public License is available via WWW at
# http://www.gnu.org/copyleft/gpl.html.	 You can also obtain it by
# writing to the Free Software Foundation, Inc., 675 Mass Ave,
# Cambridge, MA 02139, USA.

# Send any bug reports to Douglas Bates <bates@stat.wisc.edu>

require 5.003;
use strict;
use Carp;

for ( @ARGV ) {
    open( MTP, "< $_" ) ||
        carp( "Unable to open $_ for reading: $!\n" );
    s/\.mtp$/.R/i;
    open( R, "> $_" ) ||
        carp( "Unable to open $_ for writing: $!\n" );
    my $line = <MTP>;
    carp( "Input does not seem to be in Minitab Portable Format\n" )
	unless ( $line =~ /^Minitab Portable Worksheet/ );
    print R "# R data file automatically created by mtp2R\n";
    s/\.R$//;
    print R "\"$_\" <- list(\n";
    my $initial = 1;
    while (1) {
	last unless ( $line = <MTP> );
	carp( "Incorrect format\n" ) unless ( $line =~ /^%/ );
	my ($pct, $colmat, $number, $length, $NAs, $name) =
	    split(' ', $line );
	next if ( 100 <= $colmat && $colmat <= 102 );
	print R ",\n" unless $initial;
	$initial = 0;
	$name = "C" . $number if ( $name =~ /^\.$/ );
	print R "$name = c(\n";
	my $width = 16;
	$width = -$NAs + 1 if ( $NAs < 0 );
	while ( $length > 0 ) {
	    $line = <MTP>;
	    chomp $line;
	    chop $line if ($line =~ /\r$/); # Generated on DOS/Windows
	    chop $line if ($line =~ /\.$/);
	    while ( length( $line ) >= $width && $length) {
		my $value = substr( $line, 0, $width );
		$line = substr( $line, $width );
		$value =~ s/^\s+//;
		$value =~ s/\s+$//;
		if ( $NAs < 0 ) { print R "\"$value\""; } else { print R 0+$value; }
		$length--;
		print R ", " if $length;
	    }
	    print R "\n";
	}
	print R ")";
    }
    print R ")\n";
    close ( MTP );
    close ( R );
}
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._