package Splicy::Probeset; # License and INIT {{{ # # ---------------------------------------------------------------------------------- # Probeset.pm # A Module to handle Probeset informations, store and generate original data # 2005 Davide Rambaldi, IFOM. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # You may modify this module as you wish, but if you redistribute a modified version, # please attach a note listing the modifications you have made. # # You can contact me by e-mail: davide.rambaldi@ifom-ieo-campus.it # # -------------------------------------------------------------------------------------- use 5.006; use strict; use warnings; require Exporter; # exported globals goes here use vars qw($AUTOLOAD $VERSION @EXPORT @ISA @EXPORT_OK %EXPORT_TAGS); $VERSION = '1.10'; @ISA = qw(Exporter); @EXPORT = qw($VERSION); %EXPORT_TAGS = (); @EXPORT_OK = (); # Carp handle warnings use Carp qw(carp croak); # }}} # Class data and methods {{{ { # ----------------------------------------------------------------------------------------------------------------------# # List of all the attributes that came from AFFI with def value and permissions read/write/requiredi/noinit # # You can add attributes here without change the code. First value is the default value. Second value stock permissions # # we can have a SCALAR or an ANONYMOUS ARRAY as value. # # The attributes have permissions read/write (can get and can set it) # # required (you have to insert it in the new calling) # # no init (while cloning, you can't copy this attr) # # ----------------------------------------------------------------------------------------------------------------------# my %_attribute_properties = ( _probeset_id => [ '????' , 'read.required' ], # Single Probeset ID _chipcode => [ '????' , 'read.required' ], # Chip small name as into mysql table (ABBR) _genechip => [ '????' , 'read.write' ], # Genechip complete name _organism => [ '????' , 'read.write' ], # Organism _affy_date => [ '????' , 'read.write' ], # Annotation date _public_id => [ '????' , 'read.write.noinit' ], # Public_id from affy tables _seq_type => [ '????' , 'read.write.noinit' ], # Affy seq_type _seq_src => [ '????' , 'read.write.noinit' ], # Affy seq_source _trans_id => [ '????' , 'read.write.noinit' ], # Affy Cluster Transcript ID _target_des => [ '????' , 'read.write.noinit' ], # U48705 /FEATURE=mRNA /DEFINITION=HSU48705 Human receptor tyrosine kinase DDR gene, complete cds _arch_unigene => [ '????' , 'read.write.noinit' ], # Original unigene ID _description => [ '????' , 'read.write.noinit' ], # Probe method annotation description _cluster => [ '????' , 'read.write.noinit' ], # ID of the transcripts sed to annotate the probeset _assignments => [ '????' , 'read.write.noinit' ], # mRNA assignments _notes => [ '????' , 'read.write.noinit' ], # Annotation Notes _affy_align => [ ['????'] , 'read.write.noinit.array' ], # alignments from affy format (VERSION: | ALIGNMENTS: | OVERLAPS: ) _affy_note => [ ['????'] , 'read.write.noinit.array' ], # annotation affymetrix format (PUBLIC ID: | header: value) _affy_func => [ ['????'] , 'read.write.noinit.array' ], # functional annotation from affymetrix _probes => [ ['????'] , 'read.write.noinit.array' ], # List of probes id with X and Y and oligo (probe_id | X: |Y: |oligo) _locations => [ ['????'] , 'read.write.noinit.array' ], # List of matching positions with licr_id and strands (set_id | probe | licr_id | position | strand ) _cdna_locations => [ ['????'] , 'read.write.noinit.array' ], # List of matching locations with cDNA licr id _licr_info => [ ['????'] , 'read.write.noinit.array' ], # List of DISTINCT licr matching ID for this probe_set (licr_id | seq_type | gene_symbol | unigene | band) _cdna_licr_info => [ ['????'] , 'read.write.noinit.array' ], # List of DISTINCT cDNA matching this probeset _refseqposition => [ { } , 'read.write.noinit.hash' ], # RefSeq position on the genome (REFSEQ_ID => Chromosome| strand| TxStart| TxEnd| CdsStart| CdsEnd) _cdnaposition => [ { } , 'read.write.noinit.hash' ], # Cdna Position on the Genome (REFSEQ_ID => Chromosome| strand| TxStart| TxEnd| CdsStart| CdsEnd) _exons_map => [ { } , 'read.write.noinit.hash' ], # exon map created from method make_exon_map _cdna_map => [ { } , 'read.write.noinit.hash' ], # exon map or blocks for cDNA _exons_num => [ { } , 'read.write.noinit.hash' ], # Number of exons for every licr_id as ARRAY _cdna_exons_num => [ { } , 'read.write.noinit.hash' ], # Number of exons (blocks) for cDNA _abs_locations => [ { } , 'read.write.noinit.hash' ], # List of Probes locations over chromosome () per ogni REFSEQ _cdna_abs_locations => [ { } , 'read.write.noinit.hash' ] # List of ProbePairs locations over Chr for cDNA ); # ATTRIBUTES METHODS {{{ # Global counter of objects my $_count = 0; # Return a list of all attributes sub _all_attributes { keys %_attribute_properties; } # Check if a given propery is set for an attribute sub _permissions { my ($self, $attribute, $permissions) = @_; $_attribute_properties{$attribute}[1] =~ /$permissions/; } # Return the default value for a specific attribute sub _attribute_default { my ($self, $attribute) = @_; $_attribute_properties{$attribute}[0]; } # Type handler sub _check_type { my ($self,$attribute,$type) = @_; $_attribute_properties{$attribute}[1] =~ /$type/; } # Object counter sub get_count { $_count; } sub _incr_count { ++$_count; } sub _decr_count { --$_count; } # }}} } # end of the block ATTRIBUTES definition # }}} # new {{{ # # Constructor method # called from the perl.pl with $obj -> new(); # sub new { my ($class, %arg) = @_; # Create a new object two arguments: Anonymous hash, $class is the name of the class stored in by default my $self = bless {}, $class; # attribute values are set one by one with this loop foreach my $attribute ($self->_all_attributes()) { #E.g.: attribute = "_name", argument = "name" swicth between internal (_attr) and external (attr) my ($argument) = ($attribute =~ /^_(.*)/); # if argument value is given by user put it in attribute if (exists $arg{$argument}) { $self->{$attribute} = $arg{$argument}; # if is not given but required } elsif ($self -> _permissions($attribute, 'required')){ die ("No $argument attribute as required"); # else set to default } else { $self -> {$attribute} = $self -> _attribute_default($attribute); } } $class -> _incr_count(); return $self; } # }}} # clone {{{ # # Clone method # all the attr are copied from the calling objects, unless # specificaly overidden # called with: $clone_obj = $obj -> clone(); # sub clone { my ($caller, %arg) = @_; # extract class name from the calling obj my $class = ref($caller); # create a new object my $self = bless {}, $class; foreach my $attribute ($self -> _all_attributes()) { #E.g.: attribute = "_name", argument = "name" switch between internal (_attr) and external (attr) my ($argument) = ($attribute =~ /^_(.*)/); # if attr is given by user put it if (exists $arg{$argument}) { $self->{$attribute} = $arg{$argument}; # else copy the attribute from the calling object }else{ # if initialization is not allowed if ($self -> _permissions($attribute, 'noinit')) { print ("Cannot set $argument for clone: use set_$argument. Now $argument will be set as default value!\n"); $self->{$attribute} = $self -> _attribute_default($attribute); } else { $self->{$attribute} = $caller->{$attribute}; } } } $self -> _incr_count(); return $self; } # }}} # AUTOLOAD {{{ # accessors under AUTOLOAD take place of definitions as sub get_attribute {...} or sub set_attribute{...} sub AUTOLOAD { my ($self, $newvalue) = @_; my ($operation, $attribute) = ($AUTOLOAD =~ /(get|set)(_\w+)$/); # Is a legal name? unless ($operation && $attribute) { croak ("Method name $AUTOLOAD is not recognized in the class ", ref($self)); } unless (exists $self -> {$attribute}) { croak ("No such Attribute ($attribute) in the class: ", ref($self)); } # turn off strict ref for magic AUTOLOAD no strict 'refs'; # AUTOLOAD Accesors if ($operation eq 'get') { # can i read the attribute? unless ($self -> _permissions ($attribute, 'read')) { croak ("You don't have read permission for $attribute"); } # Install accessor definition in the symbol table # How AUTOLOAD work: *{$VAR} give access to the symbol definition table so, # the second time that the method get_name is called AUTOLOAD isn't called cause # get_name is saved in the symbol table *{$AUTOLOAD} = sub { my ($self) = @_; unless ($self -> _permissions($attribute, 'read')) { croak ("$attribute does not have read permission!"); } # get the attribute value # TIE-HASH # The attribute could be a scalar or a reference to an array or hash if ($self -> _check_type($attribute, 'array')) { return @{$self->{$attribute}}; } elsif ($self -> _check_type($attribute, 'hash')) { return %{$self->{$attribute}}; } else { return $self -> {$attribute}; } }; }elsif ($operation eq 'set') { # Verify if you can set the attribute unless ($self -> _permissions($attribute, 'write')) { die ("$attribute does not have write permission!"); } # set the attribute value $self -> {$attribute} = $newvalue; # Install this mutator in the symbol table *{$AUTOLOAD} = sub { my ($self, $newvalue) = @_; unless ($self -> _permissions($attribute, 'write')) { die ("$attribute does not have write permission!"); } $self -> {$attribute} = $newvalue; }; } # Turn strict refs on use strict 'refs'; # Return the attribute value return $self -> {$attribute}; } # }}} # destroy {{{ # Destoy an object that is not begin used and decrement counter sub DESTROY { my($self) = @_; $self -> _decr_count(); } # }}} # make_absolute_map {{{ # convert probe position to absolute position over genome # eccezzione: se i campi neessari sono undef da considerare sub make_absolute_map { my ($self, $refseq ,@refseq_map) = @_; my ($probeset_id,@locations,%exon_count,$definitive_exon_map) = undef; $probeset_id = $self -> get_probeset_id; # Sembra strano ma funziona! Evidentemente la prima volta che uso la chiamata GET AUTOLOAD non riesce a gestire il dato # quindi sbaglia la prima iterazione indipendentemente dal numero di oggetti # La soluzione e' semplice e sporca. Inizializza due volte locations; # double or it doesn't work my $locations = $self -> get_locations; @locations = $self -> get_locations; # double or it doesn't work my $exon_count = $self -> get_exons_num; %exon_count = $self -> get_exons_num; $definitive_exon_map = $self -> get_exons_map; if ($locations[0] eq "????") { die "Can't map probes if you don't set locations! at $!"; } # Acquire refseq map my $id = $refseq_map[0]; my $name = $refseq_map[1]; my $chrom = $refseq_map[2]; my $strand = $refseq_map[3]; my $txStart = $refseq_map[4]; my $txEnd = $refseq_map[5]; my $cdsStart = $refseq_map[6]; my $cdsEnd = $refseq_map[7]; my $exonCount = $refseq_map[8]; my $exonStarts = $refseq_map[9]; my $exonEnds = $refseq_map[10]; # Entry like NR_XXXXXX are NOT RefSeq but RNA non-coding transcripts, can map! unless ($name) { return 0; } # Set refseq location and RefSeq exon structure no warnings; my %refseq_position = $self -> get_refseqposition; # clean up undef values foreach my $key (keys %refseq_position) { if (!$refseq_position{$key}) { delete($refseq_position{$key}); } } $refseq_position{$name}[0] = [$chrom,$strand,$txStart,$txEnd,$cdsStart,$cdsEnd,$exonCount]; use warnings; # put start abd stop in another array $exonStarts =~ s/\r//; $exonEnds =~ s/\r//; my @exonstarts = split (/\,/ , $exonStarts); my @exonstops = split (/\,/ , $exonEnds); # push starts and stops in refseqposition my @exon_start_stop; my $exon_row; for (my $n = 0; $n<=$#exonstarts; $n++) { $exon_row = "$exonstarts[$n]-$exonstops[$n]"; push (@exon_start_stop, $exon_row); } $refseq_position{$name}[1] = \@exon_start_stop; $self -> set_refseqposition (\%refseq_position); # Map loop {{{ my @probes_map; for (my $m=0; $m<=$#locations; $m++) { if (defined $locations[$m][2] and $locations[$m][2] eq $refseq) { $locations[$m][3] =~ /^\[(.*)\.\.(.*)\](.*)$/; my $start = $1; my $stop = $2; my $length = $stop - $start; my $map = [$start, $stop, $length, $locations[$m][1]]; push (@probes_map, $map); } } # # probes map and refseq map are specific a put this info in an array keys is _exons_num # we can translate as: get actual exon map, verify if is init or not, set the first data or push the other data, set the new exon map. # Structure of exon map: # '_exons_num' => [ 'NM_001954: 19', 'NM_013993: 17','NM_013994: 17' ] Exon map is now resolved # #-------------------------------------------------- # my $refseq_info = "$name: $exonCount"; #-------------------------------------------------- $exon_count{$name} = $exonCount; $self -> set_exons_num ( \%exon_count); # Calculate Delta and incremental values for the Exon Map # Cases: # A. If there is ONLY one exon... # B. Too much exons lead to a bad image. Consider it into image creation my @deltas; my @incremental; my $incr; # first loop for (my $q=0; $q<=$#exonstarts; $q++) { my $delta = $exonstops[$q] - $exonstarts[$q]; push (@deltas, $delta); } # reverse if is negative strand I reverse exon order AND exon absolute mapping over chromosome if ($strand eq '-') { @deltas = reverse (@deltas); @exonstarts = reverse (@exonstarts); @exonstops = reverse (@exonstops); } # INCREMENTAL # cumulative counter for (my $q=0; $q<=$#deltas; $q++) { if ($q == 0) { $incr = $deltas[$q]; push (@incremental, $incr); } else { $incr = $incremental[$q-1] + $deltas[$q]; push (@incremental, $incr); } } # }}} # MAP PROBES OVER THE REFSEQ (ABSOLUTE AND RELATIVE) # Probe mapper loop # Absolute array for handling abs positions of probes # per questa refseq ($refseq) # Prendi l'ultimo delta esone e aggiungi la differenza tra delta e la posizione della sonda sulla refseq (Da aggiungere ad questo exon start) my @absolute; my $index; my $avanzo; my $last_exon; my $diff; my $abs_start; my $abs_stop; my $abs_row; # NOTE to generate @incremental I do the reverse of DELTA array. Now to calculate exact probe absolute position i have to revert genes in negative strand # SIETE D'ACCORDO? # Per fare meno casino ragiono sula fatto che, nel caso di geni in negative strand, facendo il reverse di exonstarts e exonsstops # riordino il rapporto tra ORDINE degli ESONI e posizioni sul cromosoma? # Case ExonCount is one (only one exon) in this case compute and exit NOW if ($exonCount == 1) { for (my $x=0; $x<=$#probes_map; $x++) { $probes_map[$x][4] = "$exonCount"; if ($strand eq "-") { # abs start is probe position more exon coordinates $abs_stop = $exonstops[0] - $probes_map[$x][0]; $abs_start = $abs_stop - $probes_map[$x][2]; } else { # abs start is probe position more exon coordinates $abs_start = $exonstarts[0] + $probes_map[$x][0]; $abs_stop = $abs_start + $probes_map[$x][2]; } # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute,$abs_row); } # EXONS ARE > THAN 1 } else { for (my $x=0; $x<=$#probes_map; $x++) { EXON: for (my $y=0; $y<=$#incremental; $y++) { if ($probes_map[$x][0] > $incremental[$y] and $probes_map[$x][1] > $incremental[$y]) { # my probe is not in this exon... see the next! # consider if a probe are in 3' UTR # if $y eq last @incremental we are at the last exon so probe is into the UTR 3' if ($y == $#incremental) { $diff = $probes_map[$x][0] - $incremental[$y]; if ($strand eq '-') { $abs_stop = $exonstarts[$y] - $diff; $abs_start = $abs_stop - $probes_map[$x][2]; } else { $abs_start = $exonstops[$y] + $diff; $abs_stop = $abs_start + $probes_map[$x][2]; } # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute, $abs_row); # relative map $probes_map[$x][4] = "UTR"; last EXON; } else { next EXON; } } if ($probes_map[$x][0] <= $incremental[$y] and $probes_map[$x][1] <= $incremental[$y]) { # my probe start and stop are in this exon # Last exon position is the previous exon # Consider if we are in the first exon: in that case ProbePair Diff is equal to Probe Pair start if ($y == 0) { if ($strand eq "-") { # abs stop $abs_stop = $exonstops[$y] - $probes_map[$x][0]; # abs start $abs_start = $abs_stop - $probes_map[$x][2]; } else { # abs start $abs_start = $exonstarts[$y] + $probes_map[$x][0]; # abs stop piu length $abs_stop = $abs_start + $probes_map[$x][2]; } } else { $index = $y - 1; if ($strand eq "-") { # differenza rispetto a questo incremental $diff = $incremental[$y] - $probes_map[$x][0]; # abs start $abs_start = $exonstarts[$y] + $diff; # abs stop $abs_stop = $abs_start + $probes_map[$x][2]; } else { $last_exon = $incremental[$index]; # differenza rispetto all'ultimo incremental $diff = $probes_map[$x][0] - $last_exon; # abs start $abs_start = $exonstarts[$y] + $diff; # abs stop piu length $abs_stop = $abs_start + $probes_map[$x][2]; } } # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute, $abs_row); # relative map my $w = $y + 1; $probes_map[$x][4] = "$w"; last EXON; } if ($probes_map[$x][0] <= $incremental[$y] and $probes_map[$x][1] > $incremental[$y]) { # In this case probe start in this exon but don't stop here # Here we can consider situation as probes over last exon and 3' UTR # previous exon my $z = $y + 2; # this exon my $w = $y + 1; # relative map and absolute map # if we are at the last exon... else between two exons # PAINT THIS SINGLE PROBE IN THE MIDDLE OF TWO EXONS WHILE START AND STOP are distant # Bio::Graphics take care of this render # Se questo e' l'ultimo esone... if ($y == $#incremental) { # consider the previous exon $index = $y - 1; $last_exon = $incremental[$index]; # add diff with probe start $diff = $probes_map[$x][0] - $last_exon; # abs start and stop $abs_start = $exonstarts[$y] + $diff; $abs_stop = $abs_start + $probes_map[$x][2]; # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute, $abs_row); # relative map $probes_map[$x][4] = "JOIN $w, UTR"; # we stay into the first exon } elsif ($y == 0) { # So in this case the probe start in the firs exon and continue to the next exon..... if ($strand eq "-") { $abs_start = $exonstops[$y] - $probes_map[$x][0]; # calculate difference between probe pair start and end of exon $diff = $incremental[$y] - $probes_map[$x][0]; # consider the next exon... $index = $y + 1; $abs_stop = $exonstops[$index] + ($probes_map[$x][2] - $diff); } else { $abs_start = $exonstarts[$y] + $probes_map[$x][0]; # calculate difference between probe pair start and end of exon $diff = $incremental[$y] - $probes_map[$x][0]; # consider the next exon... $index = $y + 1; $abs_stop = $exonstarts[$index] + ($probes_map[$x][2] - $diff); } $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute, $abs_row); $probes_map[$x][4] = "JOIN $w, $z"; } else { # NOT FIRST EXON AND NOT IN THE LAST EXON (middle) if ($strand eq "-") { # consider the previous exon $index = $y - 1; $last_exon = $incremental[$index]; # add diff contain difference from probe start $diff = $probes_map[$x][0] - $last_exon; # ok now ABS_START is THIS exonstops - diff $abs_stop = $exonstops[$y] - $diff; # avanzo is what stay in this exon so... $avanzo = $exonstops[$y] - $abs_stop; # so Probe pair stop... here: # $w are also index of the next exon so... $abs_start = $exonstops[$w] - ($probes_map[$x][2] - $avanzo); } else { # consider the previous exon $index = $y - 1; $last_exon = $incremental[$index]; # add diff contain difference from probe start $diff = $probes_map[$x][0] - $last_exon; # abs start and stop $abs_start = $exonstarts[$y] + $diff; # avanzo to calculate what stay in the second exon so... $avanzo = $exonstops[$y] - $abs_start; # so Probe pair stop... here: # $w are also index of the next exon so... $abs_stop = $exonstarts[$w] + ($probes_map[$x][2] - $avanzo); } # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push(@absolute, $abs_row); $probes_map[$x][4] = "JOIN $w, $z"; } last EXON; } } } } #END ELSE # DATA STORAGE # Put information about exon position and numer of probes HERE # I use this info to generate GD image my %data_storage = (); my %definitive_exon_map = $self -> get_exons_map; for (my $x=0; $x<=$#probes_map; $x++) { my $probe_match = $probes_map[$x][4]; if (exists $data_storage{$probe_match}) { $data_storage{$probe_match}++; } else { $data_storage{$probe_match} = 1; } } $definitive_exon_map{$refseq} = \%data_storage; $self -> set_exons_map (\%definitive_exon_map); # DATA STORAGE 2 (absolute map) # take actual map # clean up undef values no warnings; # turn off warnings or the next line of code give me a ugly warn! my %absolute_storage = $self -> get_abs_locations; foreach my $key (keys %absolute_storage) { if (!$absolute_storage{$key}) { delete($absolute_storage{$key}); } } # set new value $absolute_storage{$refseq} = \@absolute; # set actual map $self -> set_abs_locations (\%absolute_storage); use warnings; return 1; }; # }}} # make_cdna_absolute__map {{{ sub make_cdna_absolute_map { my ($self, $cdna ,@cdna_map) = @_; my ($probeset_id,@locations,%exon_count,$definitive_exon_map) = undef; $probeset_id = $self -> get_probeset_id; # Sembra strano ma funziona! Evidentemente la prima volta che uso la chiamata GET AUTOLOAD non riesce a gestire il dato # quindi sbaglia la prima iterazione indipendentemente dal numero di oggetti # La soluzione e' semplice e sporca. Inizializza due volte locations; no warnings; my $locations = $self -> get_cdna_locations; @locations = $self -> get_cdna_locations; my $exon_count = $self -> get_cdna_exons_num; %exon_count = $self -> get_cdna_exons_num; use warnings; $definitive_exon_map = $self -> get_cdna_map; if ($locations[0] eq "????") { die "Can't map probes if you don't set locations! at $!"; } # Acquire cdna map my $id = $cdna_map[0]; my $name = $cdna_map[1]; my $chrom = $cdna_map[2]; my $strand = $cdna_map[3]; my $txStart = $cdna_map[4]; my $txEnd = $cdna_map[5]; my $exonCount = $cdna_map[6]; my $exonStarts = $cdna_map[7]; my $exonEnds = $cdna_map[8]; # Entry like NR_XXXXXX are NOT RefSeq but RNA non-coding transcripts, can map! unless ($name) { return 0; } # Set cdna location and RefSeq exon structure no warnings; my %cdna_position = $self -> get_cdnaposition; # clean up undef values foreach my $key (keys %cdna_position) { if (!$cdna_position{$key}) { delete($cdna_position{$key}); } } $cdna_position{$name}[0] = [$chrom,$strand,$txStart,$txEnd,$exonCount]; use warnings; # put start abd stop in another array $exonStarts =~ s/\r//; $exonEnds =~ s/\r//; my @exonstarts = split (/\,/ , $exonStarts); my @exonstops = split (/\,/ , $exonEnds); # push starts and stops in cdnaposition my @exon_start_stop; my $exon_row; for (my $n = 0; $n<=$#exonstarts; $n++) { $exon_row = "$exonstarts[$n]-$exonstops[$n]"; push (@exon_start_stop, $exon_row); } $cdna_position{$name}[1] = \@exon_start_stop; $self -> set_cdnaposition (\%cdna_position); # Map loop {{{ my @probes_map; for (my $m=0; $m<=$#locations; $m++) { if (defined $locations[$m][2] and $locations[$m][2] eq $cdna) { $locations[$m][3] =~ /^\[(.*)\.\.(.*)\](.*)$/; my $start = $1; my $stop = $2; my $length = $stop - $start; my $map = [$start, $stop, $length, $locations[$m][1]]; push (@probes_map, $map); } } # # probes map and cdna map are specific a put this info in an array keys is _exons_num # we can translate as: get actual exon map, verify if is init or not, set the first data or push the other data, set the new exon map. # Structure of exon map: # '_exons_num' => [ 'NM_001954: 19', 'NM_013993: 17','NM_013994: 17' ] Exon map is now resolved # $exon_count{$name} = $exonCount; $self -> set_cdna_exons_num ( \%exon_count); # Calculate Delta and incremental values for the Exon Map # Cases: # A. If there is ONLY one exon... # B. Too much exons lead to a bad image. Consider it into image creation my @deltas; my @incremental; my $incr; # first loop for (my $q=0; $q<=$#exonstarts; $q++) { my $delta = $exonstops[$q] - $exonstarts[$q]; push (@deltas, $delta); } # reverse if is negative strand I reverse exon order AND exon absolute mapping over chromosome if ($strand eq '-') { @deltas = reverse (@deltas); @exonstarts = reverse (@exonstarts); @exonstops = reverse (@exonstops); } # cumulative counter for (my $q=0; $q<=$#deltas; $q++) { if ($q == 0) { $incr = $deltas[$q]; push (@incremental, $incr); } else { $incr = $incremental[$q-1] + $deltas[$q]; push (@incremental, $incr); } } # }}} # MAP PROBES OVER THE REFSEQ (ABSOLUTE AND RELATIVE) # Probe mapper loop # Absolute array for handling abs positions of probes # per questa refseq ($refseq) # Prendi l'ultimo delta esone e aggiungi la differenza tra delta e la posizione della sonda sulla refseq (Da aggiungere ad questo exon start) my @absolute; my $index; my $avanzo; my $last_exon; my $diff; my $abs_start; my $abs_stop; my $abs_row; # NOTE to generate @incremental I do the reverse of DELTA array. Now to calculate exact probe absolute position i have to revert genes in negative strand # SIETE D'ACCORDO? # Per fare meno casino ragiono sula fatto che, nel caso di geni in negative strand, facendo il reverse di exonstarts e exonsstops # riordino il rapporto tra ORDINE degli ESONI e posizioni sul cromosoma? # Case ExonCount is one (only one exon) in this case compute and exit NOW if ($exonCount == 1) { for (my $x=0; $x<=$#probes_map; $x++) { $probes_map[$x][4] = "$exonCount"; if ($strand eq "-") { # abs start is probe position more exon coordinates $abs_stop = $exonstops[0] - $probes_map[$x][0]; $abs_start = $abs_stop - $probes_map[$x][2]; } else { # abs start is probe position more exon coordinates $abs_start = $exonstarts[0] + $probes_map[$x][0]; $abs_stop = $abs_start + $probes_map[$x][2]; } # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute,$abs_row); } # EXONS ARE > THAN 1 } else { for (my $x=0; $x<=$#probes_map; $x++) { EXON: for (my $y=0; $y<=$#incremental; $y++) { if ($probes_map[$x][0] > $incremental[$y] and $probes_map[$x][1] > $incremental[$y]) { # my probe is not in this exon... see the next! # consider if a probe are in 3' UTR # if $y eq last @incremental we are at the last exon so probe is into the UTR 3' if ($y == $#incremental) { $diff = $probes_map[$x][0] - $incremental[$y]; if ($strand eq '-') { $abs_stop = $exonstarts[$y] - $diff; $abs_start = $abs_stop - $probes_map[$x][2]; } else { $abs_start = $exonstops[$y] + $diff; $abs_stop = $abs_start + $probes_map[$x][2]; } # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute, $abs_row); # relative map $probes_map[$x][4] = "UTR"; last EXON; } else { next EXON; } } if ($probes_map[$x][0] <= $incremental[$y] and $probes_map[$x][1] <= $incremental[$y]) { # my probe start and stop are in this exon # Last exon position is the previous exon # Consider if we are in the first exon: in that case ProbePair Diff is equal to Probe Pair start if ($y == 0) { if ($strand eq "-") { # abs stop $abs_stop = $exonstops[$y] - $probes_map[$x][0]; # abs start $abs_start = $abs_stop - $probes_map[$x][2]; } else { # abs start $abs_start = $exonstarts[$y] + $probes_map[$x][0]; # abs stop piu length $abs_stop = $abs_start + $probes_map[$x][2]; } } else { $index = $y - 1; if ($strand eq "-") { # differenza rispetto a questo incremental $diff = $incremental[$y] - $probes_map[$x][0]; # abs start $abs_start = $exonstarts[$y] + $diff; # abs stop $abs_stop = $abs_start + $probes_map[$x][2]; } else { $last_exon = $incremental[$index]; # differenza rispetto all'ultimo incremental $diff = $probes_map[$x][0] - $last_exon; # abs start $abs_start = $exonstarts[$y] + $diff; # abs stop piu length $abs_stop = $abs_start + $probes_map[$x][2]; } } # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute, $abs_row); # relative map my $w = $y + 1; $probes_map[$x][4] = "$w"; last EXON; } if ($probes_map[$x][0] <= $incremental[$y] and $probes_map[$x][1] > $incremental[$y]) { # In this case probe start in this exon but don't stop here # Here we can consider situation as probes over last exon and 3' UTR # previous exon my $z = $y + 2; # this exon my $w = $y + 1; # relative map and absolute map # if we are at the last exon... else between two exons # PAINT THIS SINGLE PROBE IN THE MIDDLE OF TWO EXONS WHILE START AND STOP are distant # Bio::Graphics take care of this render # Se questo e' l'ultimo esone... if ($y == $#incremental) { # consider the previous exon $index = $y - 1; $last_exon = $incremental[$index]; # add diff with probe start $diff = $probes_map[$x][0] - $last_exon; # abs start and stop $abs_start = $exonstarts[$y] + $diff; $abs_stop = $abs_start + $probes_map[$x][2]; # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute, $abs_row); # relative map $probes_map[$x][4] = "JOIN $w, UTR"; # we stay into the first exon } elsif ($y == 0) { # So in this case the probe start in the firs exon and continue to the next exon..... if ($strand eq "-") { $abs_start = $exonstops[$y] - $probes_map[$x][0]; # calculate difference between probe pair start and end of exon $diff = $incremental[$y] - $probes_map[$x][0]; # consider the next exon... $index = $y + 1; $abs_stop = $exonstops[$index] + ($probes_map[$x][2] - $diff); } else { $abs_start = $exonstarts[$y] + $probes_map[$x][0]; # calculate difference between probe pair start and end of exon $diff = $incremental[$y] - $probes_map[$x][0]; # consider the next exon... $index = $y + 1; $abs_stop = $exonstarts[$index] + ($probes_map[$x][2] - $diff); } $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push (@absolute, $abs_row); $probes_map[$x][4] = "JOIN $w, $z"; } else { # NOT FIRST EXON AND NOT IN THE LAST EXON (middle) if ($strand eq "-") { # consider the previous exon $index = $y - 1; $last_exon = $incremental[$index]; # add diff contain difference from probe start $diff = $probes_map[$x][0] - $last_exon; # ok now ABS_START is THIS exonstops - diff $abs_stop = $exonstops[$y] - $diff; # avanzo is what stay in this exon so... $avanzo = $exonstops[$y] - $abs_stop; # so Probe pair stop... here: # $w are also index of the next exon so... $abs_start = $exonstops[$w] - ($probes_map[$x][2] - $avanzo); } else { # consider the previous exon $index = $y - 1; $last_exon = $incremental[$index]; # add diff contain difference from probe start $diff = $probes_map[$x][0] - $last_exon; # abs start and stop $abs_start = $exonstarts[$y] + $diff; # avanzo to calculate what stay in the second exon so... $avanzo = $exonstops[$y] - $abs_start; # so Probe pair stop... here: # $w are also index of the next exon so... $abs_stop = $exonstarts[$w] + ($probes_map[$x][2] - $avanzo); } # absolute row $abs_row = [$abs_start, $abs_stop, $chrom,$strand,$probes_map[$x][3]]; push(@absolute, $abs_row); $probes_map[$x][4] = "JOIN $w, $z"; } last EXON; } } } } # DATA STORAGE # Put information about exon position and numer of probes HERE # I use this info to generate GD image my %data_storage = (); my %definitive_exon_map = $self -> get_cdna_map; for (my $x=0; $x<=$#probes_map; $x++) { my $probe_match = $probes_map[$x][4]; if (exists $data_storage{$probe_match}) { $data_storage{$probe_match}++; } else { $data_storage{$probe_match} = 1; } } $definitive_exon_map{$cdna} = \%data_storage; $self -> set_cdna_map (\%definitive_exon_map); # DATA STORAGE 2 (absolute map) # take actual map # clean up undef values no warnings; # turn off warnings or the next line of code give me a ugly warn! my %absolute_storage = $self -> get_cdna_abs_locations; foreach my $key (keys %absolute_storage) { if (!$absolute_storage{$key}) { delete($absolute_storage{$key}); } } # set new value $absolute_storage{$cdna} = \@absolute; # set actual map $self -> set_cdna_abs_locations (\%absolute_storage); # clean up undef values into cdna_exons_num my %exons_clean = $self -> get_cdna_exons_num; foreach my $clean (keys %exons_clean) { if (!$exons_clean{$clean}) { delete ($exons_clean{$clean}); } } $self -> set_cdna_exons_num(\%exons_clean); use warnings; return 1; }; # }}} 1; # Perldoc {{{ # Below is stub documentation for your module. You'd better edit it! =head1 NAME B - A module to handle I and store informations. This module follow the F OO-Perl structure, implements I. To use some methods you need to feed your object with required informations (coming from AffyDB and mySQL tables). =head1 SYNOPSIS use Probeset; my $obj= Probeset -> new ( probe_id => "1443133_at", chipcode => "ET_U133A" ); print "PROBE ID: ", $obj-> get_probe_id, "\n"; print "CHIPCODE", $obj-> get_chipcode, "\n"; $obj -> set_organism(""); $obj -> set_affy_date("1/1/2005"); my $obj2 = $obj -> clone ( probe_id => "3334567_at", chipcode => "GZ_U122B", ) print "\n\nCount is ", Probeset -> get_count(), "\n\n"; =head1 ABSTRACT This perl library uses perl5 objects to make it easy to create and query a Probeset object. Probeset.pm is a class to handle probesets of a genechip. It support objects construction, objects cloning, read/write permissions of the attributes, attributes required and attributes no init (can't be initialized after a cloning of the first object). This package defines a hash structure to store and use information about a probeset. Using a AffyDB.pm you can create and query a mySQL database, with Probeset.pm you recall this informations and use it. With methods B and B you can cross information coming from: Affymetrix, Ludwing Institute of cancer research and UCSC exon maps to retrive target sequences absolute positions on the genome and generate original data about ProbePairs position. =head1 DESCRIPTION =head2 CONSTRUCTOR AND INITIALIZATION =over =item B use Probeset; print $VERSION; Display current Module Version =item B use Probeset; $obj = Probeset -> new (...) Create a new object with two arguments: probe_id chipcode [ any other optional argument ] Attributes are checked and initialized one by one. A check id made to verify attribute permission. =item B use Probeset; $obj = Probeset -> clone ($obj_temp); Clone an esistent object, mantain only the attributes with init permission =item B Accessors under AUTOLOAD take place of definitions as sub get_attribute {...} or sub set_attribute{...} Install accessor definition in the symbol table How AUTOLOAD work: *{$VAR} give access to the symbol definition table so, the first time that method is called we call AUTOLOAD, the second time that the method get_name is called AUTOLOAD isn't called cause get_name is saved in the symbol table. =item B use Probeset; $obj -> get_probe_id(); $obj -> set_seq_src("GENBANK"); Get and Set methods depens by AUTOLOAD. AUTOLOAD verify is we want access or change data and make check on your request. After this step you retrive informations (get) or set information in the hash (set) =item B Just clean up memory and decrement Counter of objects. =back =head2 MAPPING METHODS =over =item B In order to use this method, you need to feed this object with information relatives to locations, exon_count and exon/intron structure of the RefSeq. Script example: use AffyDB; use Probeset; B my $affydb = AffyDB -> new ( mysql => 'affy:localhost', chip => "$chipcode", refseqcode => "$refseq_code" ); my $probeset_name = '1007_s_at'; B my $probeset = Probeset -> new ( probeset_id => $probeset_name, chipcode => $chipcode ); B my @locations = $affydb -> get_locations($probeset_name); my @targets = $affydb -> get_distinct_licr($probeset_name); B $probeset -> set_locations(\@locations); $probeset -> set_licr_info(\@targets); B for (my $n = 0; $n<=$#targets; $n++) { my @refseq_map = $affydb -> get_refseq_map ($targets[$n][0]); $probeset -> make_absolute_map ($targets[$n][0], @refseq_map); } =back =head2 ATTRIBUTES AND ARGUMENTS List of all the attributes that came from AFFI with def value and permissions read/write/requiredi/noinit You can add attributes in his hash without change the code. First value is the default value. Second value stock permissions and type we can have a SCALAR or an ANONYMOUS ARRAY or an HASH as value. my %_attribute_properties = ( _probeset_id => [ '????' , 'read.required' ], # Single Probeset ID _chipcode => [ '????' , 'read.required' ], # Chip small name as into mysql table (ABBR) _genechip => [ '????' , 'read.write' ], # Genechip complete name _organism => [ '????' , 'read.write' ], # Organism _affy_date => [ '????' , 'read.write' ], # Annotation date _public_id => [ '????' , 'read.write.noinit' ], # Public_id from affy tables _seq_type => [ '????' , 'read.write.noinit' ], # Affy seq_type _seq_src => [ '????' , 'read.write.noinit' ], # Affy seq_source _trans_id => [ '????' , 'read.write.noinit' ], # Affy Cluster Transcript ID _target_des => [ '????' , 'read.write.noinit' ], # U48705 /FEATURE=mRNA /DEFINITION=HSU48705 Human receptor tyrosine kinase DDR gene, complete cds _arch_unigene => [ '????' , 'read.write.noinit' ], # Original unigene ID _description => [ '????' , 'read.write.noinit' ], # Probe method annotation description _cluster => [ '????' , 'read.write.noinit' ], # ID of the transcripts sed to annotate the probeset _assignments => [ '????' , 'read.write.noinit' ], # mRNA assignments _notes => [ '????' , 'read.write.noinit' ], # Annotation Notes _affy_align => [ ['????'] , 'read.write.noinit.array' ], # alignments from affy format (VERSION: | ALIGNMENTS: | OVERLAPS: ) _affy_note => [ ['????'] , 'read.write.noinit.array' ], # annotation affymetrix format (PUBLIC ID: | header: value) _affy_func => [ ['????'] , 'read.write.noinit.array' ], # functional annotation from affymetrix _probes => [ ['????'] , 'read.write.noinit.array' ], # List of probes id with X and Y and oligo (probe_id | X: |Y: |oligo) _locations => [ ['????'] , 'read.write.noinit.array' ], # List of matching positions with licr_id and strands (set_id | probe | licr_id | position | strand ) _cdna_locations => [ ['????'] , 'read.write.noinit.array' ], # List of matching locations with cDNA licr id _licr_info => [ ['????'] , 'read.write.noinit.array' ], # List of DISTINCT licr matching ID for this probe_set (licr_id | seq_type | gene_symbol | unigene | band) _cdna_licr_info => [ ['????'] , 'read.write.noinit.array' ], # List of DISTINCT cDNA matching this probeset _refseqposition => [ { } , 'read.write.noinit.hash' ], # RefSeq position on the genome (REFSEQ_ID => Chromosome| strand| TxStart| TxEnd| CdsStart| CdsEnd) _cdnaposition => [ { } , 'read.write.noinit.hash' ], # Cdna Position on the Genome (REFSEQ_ID => Chromosome| strand| TxStart| TxEnd| CdsStart| CdsEnd) _exons_map => [ { } , 'read.write.noinit.hash' ], # exon map created from method make_exon_map _cdna_map => [ { } , 'read.write.noinit.hash' ], # exon map or blocks for cDNA _exons_num => [ { } , 'read.write.noinit.hash' ], # Number of exons for every licr_id as ARRAY _cdna_exons_num => [ { } , 'read.write.noinit.hash' ], # Number of exons (blocks) for cDNA _abs_locations => [ { } , 'read.write.noinit.hash' ], # List of Probes locations over chromosome () per ogni REFSEQ _cdna_abs_locations => [ { } , 'read.write.noinit.hash' ] # List of ProbePairs locations over Chr for cDNA ); =head2 EXPORT $VERSION =head1 SEE ALSO AffyDB.pm http://bio.ifom-firc.it/AffyDB =head1 AUTHOR Rambaldi, Edavide.rambaldi@ifom-ieo-campus.itE =head1 COPYRIGHT AND LICENSE Copyright 2005 by Rrambaldi This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut # }}}