#!/usr/local/bin/perl
# $Id: origins.perl,v 1.1.1.1 2003/06/04 16:49:55 humberto Exp $
# Infer the grandparental origins of a set of markers.
# Copyright (C) 1995, 1996 Humberto Ortiz Zuazaga and Rosemarie Plaetke.
# See the file licence.html for licence information.
require 5.002;
use strict "vars";
use English;
use Getopt::Long;
use Boulder::Stream;
my $progname = `basename $0`;
chop $progname;
my $result = GetOptions("help","pedin=s","datain=s","anain=s","labels=s",
"report=s");
my $bogus = $::opt_help; # Silence compiler warnings.
if (!$result || $::opt_help || $#ARGV != -1) {
&usage;
}
my ($pedin, $datain, $anain) = ($::opt_pedin, $::opt_datain, $::opt_anain);
# Set to defaults unless user supplied the info.
$pedin = "pedin.dat" unless $::opt_pedin;
open (PEDIN, $pedin) || die "$progname: cannot open $pedin: $!";
$datain = "datain.dat" unless $::opt_datain;
open (DATAIN, $datain) || die "$progname: cannot open $datain: $!";
my $report;
if ($::opt_report) {
$report = $::opt_report;
} else {
$report = '-';
}
open (REPORT, ">" . $report) || die "$progname: cannot open $report: $!";
my $labels;
my $lstream;
if ($::opt_labels) {
$labels = $::opt_labels;
open (LABELS, ">" . $labels) || die "$progname: cannot open $labels: $!";
$lstream = new Boulder::Stream(STDIN, LABELS);
}
my $anain;
if ($::opt_anain) {
$anain = $::opt_anain;
open (ANAIN, $anain) || die "$progname: cannot open $anain: $!";
}
my (@markers, %pedigrees, @list);
@markers = &parse_datain(\*DATAIN);
close (DATAIN);
if ($::opt_anain) {
@list = &parse_anain(\*ANAIN);
} else {
@list = ( 1 .. $#markers );
}
close (ANAIN);
&label_header($lstream, \@markers, \@list) if $::opt_labels;
# Loop while more families
while (!eof PEDIN) {
%pedigrees = &parse_pedin(\*PEDIN, $#markers);
&infer(\@markers, \%pedigrees);
# Print families in pedigree order.
&print_CEPH_pedigrees(\*REPORT, \@markers, \%pedigrees, \@list);
&labelings($lstream, \@markers, \%pedigrees, \@list) if $::opt_labels;
}
# end loop
### End of main
sub print_CEPH_pedigrees {
my ($file, $markers, $pedigrees, $list) = @::ARG;
my ($key, $i, $j);
my ($pgf, $pgm, $mgf, $mgm);
my ($dad, $mom);
foreach $key ( sort {$::a <=> $::b} (keys %{$pedigrees}) ) {
$pgf = $$pedigrees{$key}[$$pedigrees{$key}[1]{Father}]{Alleles};
$pgm = $$pedigrees{$key}[$$pedigrees{$key}[1]{Mother}]{Alleles};
$mgf = $$pedigrees{$key}[$$pedigrees{$key}[2]{Father}]{Alleles};
$mgm = $$pedigrees{$key}[$$pedigrees{$key}[2]{Mother}]{Alleles};
$dad = $$pedigrees{$key}[1]{Alleles};
$mom = $$pedigrees{$key}[2]{Alleles};
print $file <
30;
print $file < $marker_count;
($kind, $allele_count, $name) = /^\s*(\d)\s+(\d+)\s+#\s+(.*)$/m;
if (!$kind || !$allele_count || !$name) {
die "error parsing allele info";
}
$kind == 3 || die "cannot parse type $kind alleles";
$rec = {
Name => $name,
Kind => $kind,
Count => $allele_count,
};
$markers[$i] = $rec;
$i++;
nextline($file); # Skip the allele frequencies.
}
if (--$i != $marker_count) {
die "$progname: wrong number of loci in DATAIN file";
}
return @markers;
}
### pedin.dat ###
sub parse_pedin {
my ($file, $size) = @::ARG;
my ($pedigree,
$id,
$father,
$mother,
$offspring,
$patsib,
$matsib,
$sex,
$garbage,
@scratch);
my (%pedigrees, $i);
my ($line);
my $last = 0;
while ($line = nextline($file)) {
($pedigree, $id, $father, $mother, $offspring, $patsib, $matsib,
$sex, $garbage, @scratch) = split ' ', $line;
# split into whitespace separated columns, like awk.
@scratch || die "error parsing pedigree record";
if (($last != 0) && ($last != $pedigree)) {
seek $file,-(length $line),1;
last;
}
$last = $pedigree;
$pedigrees{$pedigree}[$id] = {
Id => $id,
Sex => $sex,
Father => $father,
Mother => $mother,
Children => $offspring,
Patsib => $patsib,
Matsib => $matsib,
};
for $i ( 1 .. $size ) {
$pedigrees{$pedigree}[$id]{Alleles}[$i][0] = {
Id => shift @scratch,
PO => ' ', # " " is unknown, P & M: paternal & maternal.
GPO => ' ',
};
$pedigrees{$pedigree}[$id]{Alleles}[$i][1] = {
Id => shift @scratch,
PO => ' ', # " " is unknown, P & M: paternal & maternal.
GPO => ' ',
};
}
}
# Delete families with no grandparents.
foreach $pedigree (keys %pedigrees) {
if ((0 == $pedigrees{$pedigree}[1]{Father})
|| (0 == $pedigrees{$pedigree}[1]{Mother})
|| (0 == $pedigrees{$pedigree}[2]{Father})
|| (0 == $pedigrees{$pedigree}[2]{Mother})) {
print STDERR "deleting pedigree $pedigree, missing grandparents\n";
delete $pedigrees{$pedigree};
}
}
return %pedigrees;
}
sub parse_anain {
my ($file) = @::ARG;
my (@markers, @cols, $word);
my ($line);
while ($line = nextline($file)) {
@cols = split ' ',$line; # Split into whitespace separated columns.
foreach $word ( @cols ) {
push (@markers, $word) if $word =~ /\d+/;
}
}
return @markers;
}
sub person_has {
my ($which, $marker, $person, $family, $pedigrees) = @_;
$which == $$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} ||
$which == $$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id};
}
sub person_doesnt_have {
my ($which, $marker, $person, $family, $pedigrees) = @_;
$which != $$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} &&
$which != $$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id} &&
0 != $$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} &&
0 != $$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id};
}
sub paternal {
my ($which, $other, $marker, $person, $family, $pedigrees) = @_;
my ($dad, $mom);
$dad = $$pedigrees{$family}[$person]{Father};
$mom = $$pedigrees{$family}[$person]{Mother};
# If the data is missing, unknown
if (0 == $which || 0 == $dad || 0 == $mom) { return 0; }
# If person is homozygous, assign at random.
if ($$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} ==
$$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id}) {
return 1;
}
# Otherwise, check alleles in parents.
return (person_has($which, $marker, $dad, $family, $pedigrees) &&
person_doesnt_have($which, $marker, $mom, $family, $pedigrees)) ||
(person_has(0, $marker, $dad, $family, $pedigrees) &&
person_has($other, $marker, $mom, $family, $pedigrees) &&
person_doesnt_have($which, $marker, $mom, $family, $pedigrees));
}
sub maternal {
my ($which, $other, $marker, $person, $family, $pedigrees) = @_;
my ($dad, $mom);
$dad = $$pedigrees{$family}[$person]{Father};
$mom = $$pedigrees{$family}[$person]{Mother};
# If the data is missing, unknown
if (0 == $which || 0 == $dad || 0 == $mom) { return 0; }
# If person is homozygous, assign at random.
if ($$pedigrees{$family}[$person]{Alleles}[$marker][0]{Id} ==
$$pedigrees{$family}[$person]{Alleles}[$marker][1]{Id}) {
return 1;
}
# Otherwise, check alleles in parents.
return (person_has($which, $marker, $mom, $family, $pedigrees) &&
person_doesnt_have($which, $marker, $dad, $family, $pedigrees)) ||
(person_has(0, $marker, $mom, $family, $pedigrees) &&
person_has($other, $marker, $dad, $family, $pedigrees) &&
person_doesnt_have($which, $marker, $dad, $family, $pedigrees));
}
sub infer {
my ($markers, $pedigrees) = @::ARG;
my ($family, $person, $marker, $allele, $copy, $other);
foreach $family ( sort {$::a <=> $::b} (keys %$pedigrees) ) {
# Find paternal origins.
foreach $person ( 1 .. $#{$pedigrees{$family}} ) {
foreach $marker ( 1 .. $#{$pedigrees{$family}[$person]{Alleles}} ) {
foreach $copy ( 0 .. 1 ) {
$allele = $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{Id};
$other = $$pedigrees{$family}[$person]{Alleles}[$marker][($copy + 1) % 2]{Id};
if (&paternal($allele, $other, $marker, $person, $family, $pedigrees)) {
#print "Paternal marker: $family $person $marker $copy\n";
$$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{PO} = 'P';
$$pedigrees{$family}[$person]{Alleles}[$marker][($copy + 1) % 2]{PO} = 'M';
}
elsif (&maternal($allele, $other, $marker, $person, $family, $pedigrees)) {
#print "Maternal marker: $family $person $marker $copy\n";
$$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{PO} = 'M';
$$pedigrees{$family}[$person]{Alleles}[$marker][($copy + 1) % 2]{PO} = 'P';
}
}
# Now reverse sort the copies.
$$pedigrees{$family}[$person]{Alleles}[$marker] = [ sort {$$::b{PO} cmp $$::a{PO};} @{$$pedigrees{$family}[$person]{Alleles}[$marker]} ];
}
}
# Find grandpaternal origins.
foreach $person ( 1 .. $#{$pedigrees{$family}} ) {
foreach $marker ( 1 .. $#{$pedigrees{$family}[$person]{Alleles}} ) {
foreach $copy ( 0 .. 1 ) {
$$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{"GPO"} =
&GPO($copy, $marker, $person, $family, $pedigrees);
}
}
}
}
}
sub GPO {
my ($copy, $marker, $person, $family, $pedigrees) = @_;
my ($parent, $allele);
if ($$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{PO} eq 'P') {
$parent = $$pedigrees{$family}[$person]{Father};
} elsif ($$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{PO} eq 'M') {
$parent = $$pedigrees{$family}[$person]{Mother};
} else {
return ' ';
}
# If parent is homozygous, unknown.
if ($$pedigrees{$family}[$parent]{Alleles}[$marker][0]{Id} ==
$$pedigrees{$family}[$parent]{Alleles}[$marker][1]{Id}) {
return ' ';
}
# Otherwise, find allele in parent.
$allele = $$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{Id};
if ($allele == $$pedigrees{$family}[$parent]{Alleles}[$marker][0]{Id}) {
return $$pedigrees{$family}[$parent]{Alleles}[$marker][0]{PO};
} else {
return $$pedigrees{$family}[$parent]{Alleles}[$marker][1]{PO};
}
}
sub label_header {
my ($stream, $markers, $list) = @::ARG;
my ($family, $person, $copy, $marker);
my (%mapid);
my $mapnames = new Stone;
my $testnames = new Stone;
my @map;
my @test;
foreach $marker ( @$list ) {
$mapid{$marker}++;
push @map, $$markers[$marker]{Name};
}
$mapnames->insert(MAPNAMES => join ' ', @map);
$stream->write_record($mapnames);
foreach $marker ( 1 .. $#{$markers} ) {
if (!$mapid{$marker}) {
push @test, $$markers[$marker]{Name};
}
}
$testnames->insert(TESTNAMES => join ' ', @test);
$stream->write_record($testnames);
}
sub labelings {
my ($stream, $markers, $pedigrees, $list) = @::ARG;
my ($family, $person, $copy, $marker);
my (%mapid);
my $chromosome = new Stone;
foreach $marker ( @$list ) {
$mapid{$marker}++;
}
foreach $family ( sort {$::a <=> $::b} (keys %{$pedigrees}) ) {
# Now a loop for printing the kids.
foreach $person ( 1 .. $#{$pedigrees{$family}} ) {
# skip if no such person!
next if !(defined $$pedigrees{$family}[$person]{Father});
# skip if person is not a child of person 1.
next if $$pedigrees{$family}[$person]{Father} != 1;
foreach $copy ( 0 .. 1 ) {
my (@mapgpo, @testgpo);
$chromosome->replace(PEDIGREE => $family);
$chromosome->replace(ID => $person);
$chromosome->replace(CHROMOSOME =>
$copy == 0 ? "PATERNAL" : "MATERNAL");
foreach $marker ( @$list ) {
if ($mapid{$marker}) {
push (@mapgpo,
$$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{"GPO"});
}
}
foreach $marker ( 1 .. $#{$pedigrees{$family}[$person]{Alleles}} ) {
if (!$mapid{$marker}) {
push (@testgpo,
$$pedigrees{$family}[$person]{Alleles}[$marker][$copy]{"GPO"});
}
}
grep(tr/ PM/UPM/,@mapgpo);
grep(tr/ PM/UPM/,@testgpo);
$chromosome->replace(MAPLABELS => join ('',@mapgpo));
$chromosome->replace(TESTLABELS => join ('',@testgpo));
$stream->write_record($chromosome);
}
}
}
}
# Return the next data line.
sub nextline {
my ($handle) = @::ARG;
while (<$handle>) {
next if /^\s*#/;
next if /^\s*$/;
return $_;
}
}
sub usage {
print <