#!/usr/local/bin/perl -w # $Id: bin2num.perl,v 1.1.1.1 2003/06/04 16:49:55 humberto Exp $ # Convert binary coded alleles to numbered alleles. # Copyright (C) 1995, 1996 Humberto Ortiz Zuazaga and Rosemarie Plaetke. # See the file licence.html for licence information. require 5.002; use strict "vars"; #no strict "refs"; use English; use Getopt::Long; # Short help screen. sub usage { my $progname = `basename $0`; chop $progname; print <) { $line .= $_; next if /^\s*#/; next if /^\s*$/; return $line; } } ### Parse command line arguments. my $result = GetOptions("help","pedin=s","datain=s","pedout=s","dataout=s"); my $bogus = $::opt_help; # Silence compiler warnings. if (!$result || $::opt_help || $#ARGV != -1) { &usage; } # Set to defaults unless user supplied the info. my $pedin = $::opt_pedin; $pedin = "pedin.dat" unless $::opt_pedin; my $datain = $::opt_datain; $datain = "datain.dat" unless $::opt_datain; my $pedout = $::opt_pedout; $pedout = "pedout.dat" unless $::opt_pedout; my $dataout = $::opt_dataout; $dataout = "dataout.dat" unless $::opt_dataout; ### Process the loci file. my ($line, $numloci, $a_type, $a_count, $a_name); my ($marker, %loci); open (DATAIN, $datain) || die "Cannot open $datain for reading"; open (DATAOUT, ">" . $dataout) || die "Cannot open $dataout for writing"; # Get the number of loci. $line = nextline(DATAIN); $line =~ /^\s*(\d+)/m; $numloci = $1 || die "cannot determine number of loci in $datain"; print DATAOUT $line; #print "$numloci\n"; # Skip rest of header. print DATAOUT nextline(DATAIN); print DATAOUT nextline(DATAIN); # Now loop over the markers. foreach $marker ( 1 .. $numloci ) { $line = nextline(DATAIN); $line =~ /^\s*(\d)\s+(\d+)\s+#\s+(\S+)\s*$/m || die "cannot parse allele info"; # go read perlre(1) $a_type = $1; $a_type == 2 || $a_type == 3 || die "cannot process type $a_type alleles"; $a_count = $2 || die "cannot determine number of alleles"; $a_name = $3 || die "cannot determine marker name"; if ($a_type == 2) { print "binary marker: $marker\n"; $line =~ s/^(\s*)2\b/${1}3/m; # rewrite marker type as '3', numbered allele } print DATAOUT $line; # print "$a_type $a_count $a_name\n"; # skip frequencies; print DATAOUT nextline(DATAIN); if ($a_type == 2) { my ($binfac, $allele, $pats); $pats = {}; $line = nextline(DATAIN); $line =~ /^\s*(\d+)\s*#?.*$/m || die "cannot detemine number of binary factors"; $binfac = $1; # print "$binfac\n"; foreach $allele (1 .. $a_count) { my ($pat); $line = nextline(DATAIN); $line =~ m/^\s*(([01]\s+){$binfac})#?.*$/m || die "cannot find binary encoding for marker $a_name, allele $allele"; $pat = $1; $pat =~ s/0/[01]/g; $pat =~ s/\s/\\s/g; $pat =~ s/\\s$//; # print "\"$pat\"\n"; $$pats{$pat} = $allele; } $loci{$marker} = { A_COUNT => $a_count, BINFAC => $binfac, PATTERNS => $pats, } } } # Now skip the rest of the file, and close the handles. print DATAOUT while ; close (DATAOUT); close (DATAIN); ### Process the pedigree file #my ($marker, $allele); # #foreach $marker ( sort {$::a <=> $::b} (keys %loci)) { # print "Marker $marker:\n"; # foreach $allele ( keys %{$loci{$marker}{PATTERNS}} ) { # print "Allele ", $loci{$marker}{PATTERNS}{$allele}; # print " pattern ", $allele, "\n"; # } #} open (PEDIN, $pedin) || die "Cannot open $pedin for reading"; open (PEDOUT, ">" . $pedout) || die "Cannot open $pedout for writing"; while ($line = nextline(PEDIN)) { # find the start of the alleles $line =~ /^\s*(\d+\s+){9}/m || die "cannot parse pedigree info"; print PEDOUT $&; # Copy the pedigree info $line = $'; # Trim the allele info from the line foreach $marker ( 1 .. $numloci ) { my ($match, $pat, @alleles, $allele); if (defined $loci{$marker}) { $pat = "^([01]\\s+){$loci{$marker}{BINFAC}}"; } else { $pat = "^(\\d+\\s+){2}"; } $line =~ /$pat/ || die "cannot parse allele info for marker $marker"; chop($match = $&); $line = $'; if (defined $loci{$marker}) { foreach $allele ( keys %{$loci{$marker}{PATTERNS}} ) { if ($match =~ m/$allele/) { # print "found allele pattern $allele\n"; push @alleles, $loci{$marker}{PATTERNS}{$allele}; } } if (0 == @alleles) { print PEDOUT "0 0 "; } elsif (1 == @alleles) { print PEDOUT "$alleles[0] $alleles[0] "; } else { print PEDOUT "$alleles[0] $alleles[1] "; } } else { print PEDOUT $match; } } # print the rest of the line; print PEDOUT $line ? $line : "\n"; } # Now skip the rest of the file, and close the handles. # Actually, trailing comments would probably confuse this program. print PEDOUT while ; close (PEDOUT); close (PEDIN);