#!/usr/local/bin/perl # $Id: origins.perl,v 1.1.1.1 2003/06/04 16:49:55 humberto Exp $ # Infer the grandparental origins of a set of markers. # Copyright (C) 1995, 1996 Humberto Ortiz Zuazaga and Rosemarie Plaetke. # See the file licence.html for licence information. require 5.002; use strict "vars"; use English; use Getopt::Long; use Boulder::Stream; my $progname = `basename $0`; chop $progname; my $result = GetOptions("help","pedin=s","datain=s","anain=s","labels=s", "report=s"); my $bogus = $::opt_help; # Silence compiler warnings. if (!$result || $::opt_help || $#ARGV != -1) { &usage; } my ($pedin, $datain, $anain) = ($::opt_pedin, $::opt_datain, $::opt_anain); # Set to defaults unless user supplied the info. $pedin = "pedin.dat" unless $::opt_pedin; open (PEDIN, $pedin) || die "$progname: cannot open $pedin: $!"; $datain = "datain.dat" unless $::opt_datain; open (DATAIN, $datain) || die "$progname: cannot open $datain: $!"; my $report; if ($::opt_report) { $report = $::opt_report; } else { $report = '-'; } open (REPORT, ">" . $report) || die "$progname: cannot open $report: $!"; my $labels; my $lstream; if ($::opt_labels) { $labels = $::opt_labels; open (LABELS, ">" . $labels) || die "$progname: cannot open $labels: $!"; $lstream = new Boulder::Stream(STDIN, LABELS); } my $anain; if ($::opt_anain) { $anain = $::opt_anain; open (ANAIN, $anain) || die "$progname: cannot open $anain: $!"; } my (@markers, %pedigrees, @list); @markers = &parse_datain(\*DATAIN); close (DATAIN); if ($::opt_anain) { @list = &parse_anain(\*ANAIN); } else { @list = ( 1 .. $#markers ); } close (ANAIN); &label_header($lstream, \@markers, \@list) if $::opt_labels; # Loop while more families while (!eof PEDIN) { %pedigrees = &parse_pedin(\*PEDIN, $#markers); &infer(\@markers, \%pedigrees); # Print families in pedigree order. &print_CEPH_pedigrees(\*REPORT, \@markers, \%pedigrees, \@list); &labelings($lstream, \@markers, \%pedigrees, \@list) if $::opt_labels; } # end loop ### End of main sub print_CEPH_pedigrees { my ($file, $markers, $pedigrees, $list) = @::ARG; my ($key, $i, $j); my ($pgf, $pgm, $mgf, $mgm); my ($dad, $mom); foreach $key ( sort {$::a <=> $::b} (keys %{$pedigrees}) ) { $pgf = $$pedigrees{$key}[$$pedigrees{$key}[1]{Father}]{Alleles}; $pgm = $$pedigrees{$key}[$$pedigrees{$key}[1]{Mother}]{Alleles}; $mgf = $$pedigrees{$key}[$$pedigrees{$key}[2]{Father}]{Alleles}; $mgm = $$pedigrees{$key}[$$pedigrees{$key}[2]{Mother}]{Alleles}; $dad = $$pedigrees{$key}[1]{Alleles}; $mom = $$pedigrees{$key}[2]{Alleles}; print $file < 30; print $file < $marker_count; ($kind, $allele_count, $name) = /^\s*(\d)\s+(\d+)\s+#\s+(.*)$/m; if (!$kind || !$allele_count || !$name) { die "error parsing allele info"; } $kind == 3 || die "cannot parse type $kind alleles"; $rec = { Name => $name, Kind => $kind, Count => $allele_count, }; $markers[$i] = $rec; $i++; nextline($file); # Skip the allele frequencies. } if (--$i != $marker_count) { die "$progname: wrong number of loci in DATAIN file"; } return @markers; } ### pedin.dat ### sub parse_pedin { my ($file, $size) = @::ARG; my ($pedigree, $id, $father, $mother, $offspring, $patsib, $matsib, $sex, $garbage, @scratch); my (%pedigrees, $i); my ($line); my $last = 0; while ($line = nextline($file)) { ($pedigree, $id, $father, $mother, $offspring, $patsib, $matsib, $sex, $garbage, @scratch) = split ' ', $line; # split into whitespace separated columns, like awk. @scratch || die "error parsing pedigree record"; if (($last != 0) && ($last != $pedigree)) { seek $file,-(length $line),1; last; } $last = $pedigree; $pedigrees{$pedigree}[$id] = { Id => $id, Sex => $sex, Father => $father, Mother => $mother, Children => $offspring, Patsib => $patsib, Matsib => $matsib, }; for $i ( 1 .. $size ) { $pedigrees{$pedigree}[$id]{Alleles}[$i][0] = { Id => shift @scratch, PO => ' ', # " " is unknown, P & M: paternal & maternal. GPO => ' ', }; $pedigrees{$pedigree}[$id]{Alleles}[$i][1] = { Id => shift @scratch, PO => ' ', # " " is unknown, P & M: paternal & maternal. GPO => ' ', }; } } # Delete families with no grandparents. foreach $pedigree (keys %pedigrees) { if ((0 == $pedigrees{$pedigree}[1]{Father}) || (0 == $pedigrees{$pedigree}[1]{Mother}) || (0 == $pedigrees{$pedigree}[2]{Father}) || (0 == $pedigrees{$pedigree}[2]{Mother})) { print STDERR "deleting pedigree $pedigree, missing grandparents\n"; delete $pedigrees{$pedigree}; } } return %pedigrees; } sub parse_anain { my ($file) = @::ARG; my (@markers, @cols, $word); my ($line); while ($line = nextline($file)) { @cols = split ' ',$line; # Split into whitespace separated columns. foreach $word ( @cols ) { push (@markers, $word) if $word =~ /\d+/; } } return @markers; } sub person_has { my ($which, $marker, $person, $family, $pedigrees) = @_; $which == $$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} || $which == $$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id}; } sub person_doesnt_have { my ($which, $marker, $person, $family, $pedigrees) = @_; $which != $$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} && $which != $$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id} && 0 != $$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} && 0 != $$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id}; } sub paternal { my ($which, $other, $marker, $person, $family, $pedigrees) = @_; my ($dad, $mom); $dad = $$pedigrees{$family}[$person]{Father}; $mom = $$pedigrees{$family}[$person]{Mother}; # If the data is missing, unknown if (0 == $which || 0 == $dad || 0 == $mom) { return 0; } # If person is homozygous, assign at random. if ($$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} == $$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id}) { return 1; } # Otherwise, check alleles in parents. return (person_has($which, $marker, $dad, $family, $pedigrees) && person_doesnt_have($which, $marker, $mom, $family, $pedigrees)) || (person_has(0, $marker, $dad, $family, $pedigrees) && person_has($other, $marker, $mom, $family, $pedigrees) && person_doesnt_have($which, $marker, $mom, $family, $pedigrees)); } sub maternal { my ($which, $other, $marker, $person, $family, $pedigrees) = @_; my ($dad, $mom); $dad = $$pedigrees{$family}[$person]{Father}; $mom = $$pedigrees{$family}[$person]{Mother}; # If the data is missing, unknown if (0 == $which || 0 == $dad || 0 == $mom) { return 0; } # If person is homozygous, assign at random. if ($$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} == $$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id}) { return 1; } # Otherwise, check alleles in parents. return (person_has($which, $marker, $mom, $family, $pedigrees) && person_doesnt_have($which, $marker, $dad, $family, $pedigrees)) || (person_has(0, $marker, $mom, $family, $pedigrees) && person_has($other, $marker, $dad, $family, $pedigrees) && person_doesnt_have($which, $marker, $dad, $family, $pedigrees)); } sub infer { my ($markers, $pedigrees) = @::ARG; my ($family, $person, $marker, $allele, $copy, $other); foreach $family ( sort {$::a <=> $::b} (keys %$pedigrees) ) { # Find paternal origins. foreach $person ( 1 .. $#{$pedigrees{$family}} ) { foreach $marker ( 1 .. $#{$pedigrees{$family}[$person]{Alleles}} ) { foreach $copy ( 0 .. 1 ) { $allele = $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{Id}; $other = $$pedigrees{$family}[$person]{Alleles}[$marker][($copy + 1) % 2]{Id}; if (&paternal($allele, $other, $marker, $person, $family, $pedigrees)) { #print "Paternal marker: $family $person $marker $copy\n"; $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{PO} = 'P'; $$pedigrees{$family}[$person]{Alleles}[$marker][($copy + 1) % 2]{PO} = 'M'; } elsif (&maternal($allele, $other, $marker, $person, $family, $pedigrees)) { #print "Maternal marker: $family $person $marker $copy\n"; $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{PO} = 'M'; $$pedigrees{$family}[$person]{Alleles}[$marker][($copy + 1) % 2]{PO} = 'P'; } } # Now reverse sort the copies. $$pedigrees{$family}[$person]{Alleles}[$marker] = [ sort {$$::b{PO} cmp $$::a{PO};} @{$$pedigrees{$family}[$person]{Alleles}[$marker]} ]; } } # Find grandpaternal origins. foreach $person ( 1 .. $#{$pedigrees{$family}} ) { foreach $marker ( 1 .. $#{$pedigrees{$family}[$person]{Alleles}} ) { foreach $copy ( 0 .. 1 ) { $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{"GPO"} = &GPO($copy, $marker, $person, $family, $pedigrees); } } } } } sub GPO { my ($copy, $marker, $person, $family, $pedigrees) = @_; my ($parent, $allele); if ($$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{PO} eq 'P') { $parent = $$pedigrees{$family}[$person]{Father}; } elsif ($$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{PO} eq 'M') { $parent = $$pedigrees{$family}[$person]{Mother}; } else { return ' '; } # If parent is homozygous, unknown. if ($$pedigrees{$family}[$parent]{Alleles}[$marker][0]{Id} == $$pedigrees{$family}[$parent]{Alleles}[$marker][1]{Id}) { return ' '; } # Otherwise, find allele in parent. $allele = $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{Id}; if ($allele == $$pedigrees{$family}[$parent]{Alleles}[$marker][0]{Id}) { return $$pedigrees{$family}[$parent]{Alleles}[$marker][0]{PO}; } else { return $$pedigrees{$family}[$parent]{Alleles}[$marker][1]{PO}; } } sub label_header { my ($stream, $markers, $list) = @::ARG; my ($family, $person, $copy, $marker); my (%mapid); my $mapnames = new Stone; my $testnames = new Stone; my @map; my @test; foreach $marker ( @$list ) { $mapid{$marker}++; push @map, $$markers[$marker]{Name}; } $mapnames->insert(MAPNAMES => join ' ', @map); $stream->write_record($mapnames); foreach $marker ( 1 .. $#{$markers} ) { if (!$mapid{$marker}) { push @test, $$markers[$marker]{Name}; } } $testnames->insert(TESTNAMES => join ' ', @test); $stream->write_record($testnames); } sub labelings { my ($stream, $markers, $pedigrees, $list) = @::ARG; my ($family, $person, $copy, $marker); my (%mapid); my $chromosome = new Stone; foreach $marker ( @$list ) { $mapid{$marker}++; } foreach $family ( sort {$::a <=> $::b} (keys %{$pedigrees}) ) { # Now a loop for printing the kids. foreach $person ( 1 .. $#{$pedigrees{$family}} ) { # skip if no such person! next if !(defined $$pedigrees{$family}[$person]{Father}); # skip if person is not a child of person 1. next if $$pedigrees{$family}[$person]{Father} != 1; foreach $copy ( 0 .. 1 ) { my (@mapgpo, @testgpo); $chromosome->replace(PEDIGREE => $family); $chromosome->replace(ID => $person); $chromosome->replace(CHROMOSOME => $copy == 0 ? "PATERNAL" : "MATERNAL"); foreach $marker ( @$list ) { if ($mapid{$marker}) { push (@mapgpo, $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{"GPO"}); } } foreach $marker ( 1 .. $#{$pedigrees{$family}[$person]{Alleles}} ) { if (!$mapid{$marker}) { push (@testgpo, $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{"GPO"}); } } grep(tr/ PM/UPM/,@mapgpo); grep(tr/ PM/UPM/,@testgpo); $chromosome->replace(MAPLABELS => join ('',@mapgpo)); $chromosome->replace(TESTLABELS => join ('',@testgpo)); $stream->write_record($chromosome); } } } } # Return the next data line. sub nextline { my ($handle) = @::ARG; while (<$handle>) { next if /^\s*#/; next if /^\s*$/; return $_; } } sub usage { print <