package Splicy::AffyDB; # License and INIT {{{ # # ---------------------------------------------------------------------------------- # # A Module to handle affy annotation tables and dump informaton to a mySQL database # Copyright (C) 2005 Davide Rambaldi. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # You may modify this module as you wish, but if you redistribute a modified version, # please attach a note listing the modifications you have made. # # You can contact me by e-mail: filter-drambald@ifom-ieo-campus.it # # -------------------------------------------------------------------------------------- use 5.006; use strict; use warnings; require Exporter; # exported globals goes here use vars qw($VERSION @EXPORT @ISA @EXPORT_OK %EXPORT_TAGS); $VERSION = '1.10'; @ISA = qw(Exporter); @EXPORT = qw($VERSION); %EXPORT_TAGS = (); @EXPORT_OK = (); # Carp handle warnings DBI connect to the DB use Carp qw(carp croak); use DBI; # }}} # Attributes {{{ #-------------------------------------------------- # Class data and methods #-------------------------------------------------- { # Hash of attributes with default values my %_attributes = ( _mysql => '??', # mysql database and host (affy:localhost) _dbh => '', # database handler _affyfile => '??', # Affy annotation table data src _licrfile => '??', # Ludwing Institute for Cancer Research annotation file (RefSeq) _licrrna => '??', # Ludwing Institute for Cancer Research annotation file (mRNA) _refseqfile => '??', # RefSeq exon map over DNA table _refseqcode => '??', # RefSeq code will be the name of the exon map table _cdnafile => '??', # cDNA file from UCSC _cdnacode => '??', # cDNA code will be the name of the cDNA exon map _chip => '??', # genechip name for create probe table _summary => '??', # summary (0-1) _user => '??', # username _password => '??', # password _probepairs => '??', # Table FILE Of Diagnostics probe Pairs (RefSeq) _rnaprobepairs => '??', # Table FILE Of Dia PP (mRNA) _hisoforms => '??' # Human table FILE of ISOFORM generated by Barbara ); # Return a list of all's attributes sub _all_attributes { keys %_attributes; } } # }}} # new {{{ # ---------------------------------------------------------------------------------- # The constructor methods # ---------------------------------------------------------------------------------- sub new { my ($class, %arg) = @_; # create new object my $self = bless {}, $class; # set the attributes for the provided args foreach my $attribute ($self -> _all_attributes()) { # switch between attribute and arguments # where $attribute = _name and $arguments = name my($argument) = ($attribute =~ /^_(.*)/); if (exists $arg{$argument}) { $self -> {$attribute} = $arg{$argument}; } } # mySQL is required unless ($arg{mysql}) { croak ("No mySQL host:database specified\n"); } # Username and password are required unless ($arg{user} and $arg{password}) { croak ("No mySQL username and password specified\n"); } # chip name is required unless ($arg{chip}) { croak ("No chip name specified, to insert exon maps use chipcode 'test'\n"); } # Connect to the affy database # dbi:driver:host:database, username, password my $dbh; my $user = $arg{user}; my $passwd = $arg{password}; unless ($dbh = DBI -> connect("dbi:mysql:$arg{mysql}", $user, $passwd)) { croak ("Can't connect to the mySQL database error is: $!"); } $self -> setDBhandle($dbh); # if a affy_table is given as arg. # Populate the database with the affy info if ($arg{affyfile}) { my $affy = $self -> _parse_affy(); if ($affy == 1) { print "Creation of the new affy tables complete!\n"; } else { print "Creation of new affy tables failed\n"; } } # if licrfile is given parse licr files # Populate the database with licr infos if ($arg{licrfile}) { my $licr = $self -> _parse_licr(); if ($licr ==1) { print "Creation of the new licr tables complete!\n"; } else { print "Creation of new licr tables failed\n"; } } # if refseqfile is given, parse UCSC exon map # and populate the database if ($arg{refseqfile}) { my $exon_map = $self -> _parse_refseq_exonmap(); if ($exon_map ==1) { print "Creation of the new RefSeq tables complete!\n"; } else { print "Creation of new RefSeq tables failed\n"; } } # if cDNA file is given, parse UCSC exon map # and populate the database if ($arg{cdnafile}) { my $exon_map = $self -> _parse_cdna_exonmap(); if ($exon_map ==1) { print "Creation of the new cDNA tables complete!\n"; } else { print "Creation of new cDNA tables failed\n"; } } # if licrfile(RNA) is given parse file # Populate the database with licr infos if ($arg{licrrna}) { my $licr = $self -> _parse_licrrna(); if ($licr ==1) { print "Creation of the new RNA tables complete!\n"; } else { print "Creation of new RNA tables failed\n"; } } # if probepairs(RefSeq) is given parse file # Populate the database with diagnostics ProbePairs if ($arg{probepairs}) { my $pp = $self -> _parse_probepairs(); if ($pp ==1) { print "Creation of the new ProbePiars-RefSeq tables complete!\n"; } else { print "Creation of new ProbePairs-RefSeq tables failed\n"; } } # if probepairs(mRNA) is given parse file # Populate the database with diagnostics ProbePairs if ($arg{rnaprobepairs}) { my $pp = $self -> _parse_rnaprobepairs(); if ($pp ==1) { print "Creation of the new ProbePiars-mRNA tables complete!\n"; } else { print "Creation of new ProbePairs-mRNA tables failed\n"; } } # if Isoforms is given parse file # Populate the database if ($arg{hisoforms}) { my $pp = $self -> _parse_hisoforms(); if ($pp ==1) { print "Creation of the new Human Isoforms tables complete!\n"; } else { print "Creation of new Human Isoforms tables failed\n"; } } return $self; } # }}} # mySQL_handler {{{ # --------------------------------------------------------------------------------- # # No set methods, all is made by the new constructor class # methods here hanlde SQL connection and Arguments of AffyDB object. # # HANDLERS # # --------------------------------------------------------------------------------- sub getDBhandle { my ($self) = @_; return $self -> {_dbh}; } sub setDBhandle { my ($self,$dbh) = @_; return $self -> {_dbh} = $dbh; } sub get_affyfile { my($self) = @_; return $self -> {_affyfile}; } sub get_licrfile { my ($self) = @_; return $self -> {_licrfile}; } sub get_licrrna { my ($self) = @_; return $self -> {_licrrna}; } sub get_refseqfile { my ($self) = @_; return $self -> {_refseqfile}; } sub get_probepairs { my ($self) = @_; return $self -> {_probepairs}; } sub get_rnaprobepairs { my ($self) = @_; return $self -> {_rnaprobepairs}; } sub get_hisoforms{ my ($self) = @_; return $self -> {_hisoforms}; } sub get_refseqcode { my ($self) = @_; my $refseqcode = $self -> {_refseqcode}; $refseqcode =~ s/-/_/gm; return $refseqcode; } sub get_cdnacode { my ($self) = @_; my $cdnacode = $self -> {_cdnacode}; $cdnacode =~ s/-/_/gm; return $cdnacode; } sub get_cdnafile { my ($self) = @_; return $self -> {_cdnafile}; } sub getChipName { my($self) = @_; my $chip_name = $self -> {_chip}; $chip_name =~ s/-/_/gm; return $chip_name; } sub getSummary { my($self) = @_; return $self -> {_summary}; } sub affy_disconnect { my ($self) = @_; my $dbh = $self -> getDBhandle(); $dbh -> disconnect(); return 1; } # }}} # DROP METHOD {{{ sub drop { my ($self, $tables) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $drop_string = 'DROP TABLE '. $tables; my $rv = $dbh -> do($drop_string); return 1; } # }}} # GET's {{{ # ------------------------------------------------------------------------------------- # GET methods (take informations, make queries...) # NOW UPDATED WITH OUTPUT OPTION (HTML o REFERENCES) # ------------------------------------------------------------------------------------- # SECTION A (take probesets names) {{{ #-------------------------------------------------- # FREE QUERY {{{ # Just write your mySQl query! #-------------------------------------------------- sub freequery { my ($self, $query) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ($query); $sth -> execute(); my @result; while (my $row = $sth -> fetchrow_arrayref) { push (@result, @$row); } return @result; } # }}} #-------------------------------------------------- # get 1 probeset from set_id {{{ #-------------------------------------------------- sub get_probeset { my ($self, $probe) = @_; my $chip = $self -> getChipName(); my $probeset_table_name = $chip; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "select * from $probeset_table_name where set_id=?" ); $sth -> execute($probe); my @probes; while (my $row = $sth -> fetchrow_arrayref) { push (@probes, @$row); } return @probes; } # }}} #-------------------------------------------------- # get probeset list from public_id {{{ #-------------------------------------------------- sub get_public_id { my ($self, $public_id) = @_; my $chip = $self -> getChipName(); my $probeset_table_name = $chip; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "select DISTINCT set_id from $probeset_table_name where public_id=?" ); $sth -> execute($public_id); my @probes; while (my $row = $sth -> fetchrow_arrayref) { push (@probes, @$row); } return @probes; } # }}} #-------------------------------------------------- # GET all probeset that MATCH for a specific REFSEQ {{{ #-------------------------------------------------- sub get_matching_probeset { my($self,$refseq_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT set_id FROM $probes_table_name WHERE licr_id=?" ); $sth -> execute($refseq_id); my @probes; while (my $row = $sth -> fetchrow_arrayref) { push (@probes, @$row); } return @probes; } # }}} #-------------------------------------------------- # GET all probeset that MATCH for a specific GENE {{{ #-------------------------------------------------- sub get_matching_genes { my($self,$gene) = @_; my $chip = $self -> getChipName(); my $affynote_table_name = $chip.'affynote'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT set_id FROM $chip, $affynote_table_name WHERE gene_symbol LIKE ? AND $affynote_table_name.public_id=$chip.public_id" ); # add wildcards $gene =~ s/(.*)/%$1%/; $sth -> execute($gene); my @probes; while (my $row = $sth -> fetchrow_arrayref) { push (@probes, @$row); } return @probes; } # }}} # GET GENE LINK {{{ #-------------------------------------------------- sub get_entrez_link { my($self,$probeset) = @_; my $chip = $self -> getChipName(); my $affynote_table_name = $chip.'affynote'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT locuslink FROM $chip, $affynote_table_name WHERE set_id=? AND $affynote_table_name.public_id=$chip.public_id" ); # add wildcards $sth -> execute($probeset); my @probes; while (my $row = $sth -> fetchrow_arrayref) { push (@probes, @$row); } return @probes; } # }}} #-------------------------------------------------- # GET all probeset of a chip {{{ #-------------------------------------------------- sub get_all_probeset { my ($self) = @_; my $chipcode = $self -> getChipName(); my $probeset_table_name = $chipcode; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT set_id FROM $probeset_table_name" ); $sth -> execute(); my @probes; while (my $row = $sth -> fetchrow_arrayref) { push (@probes,@$row); } return @probes; } # }}} # }}} # SECTION B (take informations) {{{ #-------------------------------------------------- # GET information about the affy design of a specific probeset {{{ # select testdesign.public_id,seq_type,seq_source,target_des,arch_unigene,trans_id from testdesign join test where test.set_id='1415670_at' #-------------------------------------------------- sub get_probeset_design { my ($self, $probe) = @_; my $chip = $self -> getChipName(); my $probeset_table_name = $chip; my $design_table_name = $probeset_table_name.'design'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT $design_table_name.public_id,seq_type,seq_source,target_des,arch_unigene,trans_id, $probeset_table_name.description, cluster, assignments, notes FROM $probeset_table_name, $design_table_name WHERE $probeset_table_name.set_id=? AND $probeset_table_name.public_id = $design_table_name.public_id" ); $sth -> execute($probe); my @info; while (my $row = $sth -> fetchrow_arrayref) { push (@info, @$row); } return @info; } # }}} #-------------------------------------------------- # GET chip information {{{ #-------------------------------------------------- sub get_chip { my ($self) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT * FROM genechip_array WHERE chipcode=?" ); $sth -> execute($chip); my @info; while (my $row = $sth -> fetchrow_arrayref) { push (@info,@$row); } return @info; } # }}} # GET oligo by ID {{{ # for javascript POP up #-------------------------------------------------- sub get_one_oligo { my ($self,$probe_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT set_id, x, y, oligo, position FROM $probes_table_name WHERE probe_id=?" ); $sth -> execute ($probe_id); my @oligos; while (my $row = $sth -> fetchrow_arrayref) { push(@oligos, @$row); } return @oligos; } # }}} # GET oligo matching mRNA by ID {{{ # for javascript POP up #-------------------------------------------------- sub get_one_rna_oligo { my ($self,$probe_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'RNAprobes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT set_id, x, y, oligo, position FROM $probes_table_name WHERE probe_id=?" ); $sth -> execute ($probe_id); my @oligos; while (my $row = $sth -> fetchrow_arrayref) { push(@oligos, @$row); } return @oligos; } # }}} # GET oligo by X and Y {{{ # for Splice TABLES #-------------------------------------------------- sub get_xy_oligo { my ($self,$probeset, $x, $y) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT set_id, x, y, oligo, strand FROM $probes_table_name WHERE set_id=? AND x=? AND y=?" ); $sth -> execute ($probeset, $x, $y); my @oligos; while (my $row = $sth -> fetchrow_arrayref) { push(@oligos, @$row); } return @oligos; } # }}} # GET oligo by X and Y {{{ # for Splice TABLES #-------------------------------------------------- sub get_cdna_xy_oligo { my ($self,$probeset, $x, $y) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'RNAprobes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT set_id, x, y, oligo, strand FROM $probes_table_name WHERE set_id=? AND x=? AND y=?" ); $sth -> execute ($probeset, $x, $y); my @oligos; while (my $row = $sth -> fetchrow_arrayref) { push(@oligos, @$row); } return @oligos; } # }}} #-------------------------------------------------- # GET oligos count {{{ #-------------------------------------------------- sub get_oligos_count { my ($self,$set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT count(DISTINCT oligo) FROM $probes_table_name WHERE set_id=?" ); $sth -> execute ($set_id); my $result; while (my $row = $sth -> fetchrow_array) { $result = $row; } return $result; } # }}} #-------------------------------------------------- # GET oligos {{{ # UPDATED to REF #-------------------------------------------------- sub get_oligos { my ($self,$set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT oligo, set_id, x, y,strand FROM $probes_table_name WHERE set_id=?" ); $sth -> execute ($set_id); my @oligos; while (my $row = $sth -> fetchrow_arrayref) { my @oligorow = @$row; push(@oligos, \@oligorow); } return @oligos; } # }}} #-------------------------------------------------- # GET oligos_html {{{ #-------------------------------------------------- sub get_oligos_html { my ($self,$set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT oligo, set_id, x, y FROM $probes_table_name WHERE set_id=?" ); $sth -> execute ($set_id); my @oligos; while (my $row = $sth -> fetchrow_arrayref) { my $oligorow = "(@$row[2],@$row[3]) @$row[0]"; push(@oligos, $oligorow); } return @oligos; } # }}} #-------------------------------------------------- # GET all probes-matchRefSeq of a specific probe_set with X and Y {{{ # Entry struct: probe_id | X: | Y: | oligo # UPDATED TO REF #-------------------------------------------------- sub get_probes { my ($self,$set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT oligo, set_id, x, y, strand,licr_id,probe_id FROM $probes_table_name WHERE set_id=?" ); $sth -> execute($set_id); my @probes; while (my $row = $sth -> fetchrow_arrayref) { #my $proberow = "PROBE_ID:@$row[0] | "."X:@$row[1] | "."Y:@$row[2] | "."@$row[3]"; my @proberow = @$row; push (@probes,\@proberow); } return @probes; } # }}} #-------------------------------------------------- # GET all probes-matchRefSeq of a specific probe_set with X and Y {{{ # WITH LOCATIONS #-------------------------------------------------- sub get_probes_loci { my ($self,$set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT oligo, set_id, x, y, strand,licr_id,probe_id,position FROM $probes_table_name WHERE set_id=?" ); $sth -> execute($set_id); my @probes; while (my $row = $sth -> fetchrow_arrayref) { #my $proberow = "PROBE_ID:@$row[0] | "."X:@$row[1] | "."Y:@$row[2] | "."@$row[3]"; my @proberow = @$row; push (@probes,\@proberow); } return @probes; } # }}} #-------------------------------------------------- # GET all probes-matchRefSeq of a specific probe_set with X and Y {{{ # WITH LOCATIONS #-------------------------------------------------- sub get_cdna_probes_loci { my ($self,$set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'RNAprobes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT oligo, set_id, x, y, strand,licr_id,probe_id,position FROM $probes_table_name WHERE set_id=?" ); $sth -> execute($set_id); my @probes; while (my $row = $sth -> fetchrow_arrayref) { #my $proberow = "PROBE_ID:@$row[0] | "."X:@$row[1] | "."Y:@$row[2] | "."@$row[3]"; my @proberow = @$row; push (@probes,\@proberow); } return @probes; } # }}} #-------------------------------------------------- # GET all matching CDNA positions for a specific probeset {{{ #-------------------------------------------------- sub get_cdna_locations { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'RNAprobes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT set_id,probe_id,licr_id, position, strand FROM $probes_table_name WHERE set_id=?" ); $sth -> execute($set_id); my @locations; while (my $row = $sth -> fetchrow_arrayref) { #my $locationrow = "PROBE_ID:@$row[1] | PROBESET_ID:@$row[0] | PUBLIC_ID:@$row[2] | POSITION:@$row[3] | STRAND:@$row[4]"; my @locations_row = @$row; push (@locations, \@locations_row); } return @locations; } # }}} #-------------------------------------------------- # GET(UPDATED) all matching positions for a specific probeset {{{ #-------------------------------------------------- sub get_locations { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT set_id,probe_id,licr_id, position, strand FROM $probes_table_name WHERE set_id=?" ); $sth -> execute($set_id); my @locations; while (my $row = $sth -> fetchrow_arrayref) { #my $locationrow = "PROBE_ID:@$row[1] | PROBESET_ID:@$row[0] | PUBLIC_ID:@$row[2] | POSITION:@$row[3] | STRAND:@$row[4]"; my @locations_row = @$row; push (@locations, \@locations_row); } return @locations; } # }}} #-------------------------------------------------- # GET licrinfo of a specific probe {{{ #-------------------------------------------------- sub get_probe_info { my ($self, $probe_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT * FROM $probes_table_name WHERE probe_id=?" ); $sth -> execute($probe_id); my @probe; while (my $row = $sth -> fetchrow_arrayref) { push (@probe, @$row); } return @probe; } # }}} #-------------------------------------------------- # GET(UPDATED) distinct licr_id matching a specific probeset {{{ # SELECT DISTINCT * FROM HG_U133Aprobes, HG_U133Alicrinfo WHERE HG_U133Aprobes.set_id='1007_s_at' AND HG_U133Aprobes.licr_id=HG_U133Alicrinfo.licr_id; #-------------------------------------------------- sub get_distinct_licr { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $licr_table_name = $chip.'licrinfo'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT $licr_table_name.licr_id,seq_type,gene_symbol,unigene,band FROM $probes_table_name, $licr_table_name WHERE $probes_table_name.set_id=? AND $probes_table_name.licr_id=$licr_table_name.licr_id" ); $sth -> execute($set_id); my @licr_id; while (my $row = $sth -> fetchrow_arrayref) { #my $licrow = "LICR_ID: @$row[0] | SEQ_TYPE: @$row[1] | GENE_SYMBOL: @$row[2] | UNIGENE: @$row[3] | BAND: @$row[4]"; my @licr_row = @$row; push (@licr_id, \@licr_row); } return @licr_id; } # }}} #-------------------------------------------------- # CDNA distinct licr_id matching a specific probeset {{{ #-------------------------------------------------- sub get_cdna_distinct_licr { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'RNAprobes'; my $licr_table_name = $chip.'RNAlicrinfo'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT $licr_table_name.licr_id,seq_type,gene_symbol,unigene,band FROM $probes_table_name, $licr_table_name WHERE $probes_table_name.set_id=? AND $probes_table_name.licr_id=$licr_table_name.licr_id" ); $sth -> execute($set_id); my @licr_id; while (my $row = $sth -> fetchrow_arrayref) { #my $licrow = "LICR_ID: @$row[0] | SEQ_TYPE: @$row[1] | GENE_SYMBOL: @$row[2] | UNIGENE: @$row[3] | BAND: @$row[4]"; my @licr_row = @$row; push (@licr_id, \@licr_row); } return @licr_id; } # }}} #-------------------------------------------------- # GET(HTML) distinct licr_id matching a specific probeset {{{ #-------------------------------------------------- sub get_distinct_licr_html { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $probes_table_name = $chip.'probes'; my $licr_table_name = $chip.'licrinfo'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT DISTINCT $licr_table_name.licr_id,seq_type,gene_symbol,unigene,band FROM $probes_table_name, $licr_table_name WHERE $probes_table_name.set_id=? AND $probes_table_name.licr_id=$licr_table_name.licr_id" ); $sth -> execute($set_id); my @licr_id; while (my $row = $sth -> fetchrow_arrayref) { my $licrow = "
LICR_ID:@$row[0]
SEQ_TYPE:@$row[1]
GENE_SYMBOL:@$row[2]
UNIGENE:@$row[3]
BAND:@$row[4]
"; push (@licr_id, $licrow); } return @licr_id; } # }}} #-------------------------------------------------- # GET all affy alignments for a specific probeset {{{ # UPDATED To REF #-------------------------------------------------- sub get_affy_alignments { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $affyinfo_tablename = $chip.'affyinfo'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT * FROM $affyinfo_tablename WHERE set_id=?" ); $sth -> execute ($set_id); my @affy_align; while (my $row = $sth -> fetchrow_arrayref) { #my $alignrow = "GENOME VERSION: @$row[1] | ALIGNMENTS: @$row[2]"; my @alignrow = @$row; push (@affy_align, \@alignrow); } return @affy_align; } # }}} #-------------------------------------------------- # GET(HTML) all affy alignments for a specific probeset {{{ # UPDATED to version Dec 2004 #-------------------------------------------------- sub get_affy_alignments_html { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $affyinfo_tablename = $chip.'affyinfo'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT * FROM $affyinfo_tablename WHERE set_id=?" ); $sth -> execute ($set_id); my @affy_align; while (my $row = $sth -> fetchrow_arrayref) { my $alignrow = "GENOME VERSION@$row[1]ALIGNMENTS@$row[2]"; push (@affy_align, $alignrow); } return @affy_align; } # }}} #-------------------------------------------------- # GET affy notes for a probeset_id {{{ # updated added cluster_type and new columns # UPDATED TO REF #-------------------------------------------------- sub get_affy_note { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $affynote_tablename = $chip.'affynote'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT $affynote_tablename.* FROM $chip,$affynote_tablename WHERE $chip.set_id=? AND $chip.public_id=$affynote_tablename.public_id" ); $sth -> execute($set_id); my @affynote; while (my $row = $sth -> fetchrow_arrayref) { #my $affyrow = "PUBLIC ID: @$row[0] | GENE_SYMBOL: @$row[1] | GENE_TITLE: @$row[2] | CHR_LOCATION: @$row[3] | UNIGENE: @$row[4] | CLUSTER TYPE: @$row[5] | ENSEMBL: @$row[6] | LOCUSLINK: @$row[7] | SWISSPROT: @$row[8] | EC: @$row[9] | OMIM: @$row[10] | REFSEQ_PROT: @$row[11] | REFSEQ_TRAN: @$row[12] | FLYBASE: @$row[13] | AGI: @$row[14] | WORMBASE: @$row[15] | MGI: @$row[16] | RGD: @$row[17] | SGD: @$row[18]"; push (@affynote, @$row); } return @affynote; } # }}} #-------------------------------------------------- # GET(HTML) affy notes for a probeset_id {{{ # added cluster_type and new columns #-------------------------------------------------- sub get_affy_note_html { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $affynote_tablename = $chip.'affynote'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT $affynote_tablename.* FROM $chip,$affynote_tablename WHERE $chip.set_id=? AND $chip.public_id=$affynote_tablename.public_id" ); $sth -> execute($set_id); my @affynote; while (my $row = $sth -> fetchrow_arrayref) { my $affyrow = "PUBLIC ID@$row[0]GENE_SYMBOL@$row[1]GENE_TITLE@$row[2]CHR_LOCATION@$row[3]UNIGENE@$row[4]UNIGENE TYPE@$row[5]ENSEMBL@$row[6]LOCUSLINK@$row[7]SWISSPROT@$row[8]EC@$row[9]OMIM@$row[10]REFSEQ_PROT@$row[11]REFSEQ_TRAN@$row[12]FLYBASE@$row[13]AGI@$row[14]WORMBASE@$row[15]MGI@$row[16]RGD@$row[17]SGD@$row[18]"; push (@affynote, $affyrow); } return @affynote; } # }}} #-------------------------------------------------- # GET affy funcional annotation for a specific probeset_id {{{ # UPDATED TO REF #-------------------------------------------------- sub get_affy_func { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $affyfunc_tablename = $chip.'affyfunc'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT $affyfunc_tablename.* FROM $chip, $affyfunc_tablename WHERE set_id=? AND $chip.public_id= $affyfunc_tablename.public_id" ); $sth -> execute($set_id); my @affyfunc; while (my $row = $sth -> fetchrow_arrayref) { #my $affynoterow = "PUBLIC ID: @$row[0] | GO BIO: @$row[1] | GO CELL: @$row[2] | GO MOL: @$row[3] | PATHWAY: @$row[4] | PROT FAM: @$row[5] | PROT DOM: @$row[6] | INTERPRO: @$row[7] | MEMBRANE: @$row[8] | QTL: @$row[9]"; push (@affyfunc, @$row); } return @affyfunc; } # }}} #-------------------------------------------------- # GET(HTML) affy funcional annotation for a specific probeset_id {{{ #-------------------------------------------------- sub get_affy_func_html { my ($self, $set_id) = @_; my $chip = $self -> getChipName(); my $affyfunc_tablename = $chip.'affyfunc'; my $dbh = $self -> getDBhandle(); my $sth = $dbh -> prepare ( "SELECT $affyfunc_tablename.* FROM $chip, $affyfunc_tablename WHERE set_id=? AND $chip.public_id= $affyfunc_tablename.public_id" ); $sth -> execute($set_id); my @affyfunc; while (my $row = $sth -> fetchrow_arrayref) { my $affynoterow = "PUBLIC ID@$row[0]GO BIO@$row[1]GO CELL@$row[2]GO MOL@$row[3]PATHWAY@$row[4]PROT FAM@$row[5]PROT DOM@$row[6]INTERPRO@$row[7]MEMBRANE@$row[8]QTL@$row[9]"; push (@affyfunc, $affynoterow); } return @affyfunc; } # }}} # GET chromosome map for a refseq {{{ sub get_refseq_map { my ($self, $name) = @_; my $dbh = $self -> getDBhandle(); my $refseqcode = $self -> get_refseqcode(); if (!$refseqcode) { die "Can't make map without a refseq code! Error is: $!"; } my $sth = $dbh -> prepare ( "SELECT * FROM $refseqcode WHERE name=?" ); $sth -> execute ($name); my @refseq_map; while (my $row = $sth -> fetchrow_arrayref) { push (@refseq_map, @$row); } return @refseq_map; } # }}} # GET chromosome map for a cDNA {{{ sub get_cdna_map { my ($self, $name) = @_; my $dbh = $self -> getDBhandle(); my $cdnacode = $self -> get_cdnacode(); if (!$cdnacode) { die "Can't make map without a cdna code! Error is: $!"; } my $sth = $dbh -> prepare ( "SELECT * FROM $cdnacode WHERE name=?" ); $sth -> execute ($name); my @cdna_map; while (my $row = $sth -> fetchrow_arrayref) { push (@cdna_map, @$row); } return @cdna_map; } # }}} # GET Isoforms {{{ sub get_isoform { my ($self, $name) = @_; my $dbh = $self -> getDBhandle(); my $refseqcode = $self -> get_refseqcode(); my $isoformcode = $refseqcode.'isoforms'; if (!$refseqcode) { die "Can't take isoforms infos without a refseq code! Error is: $!"; } my $sth = $dbh -> prepare ( "SELECT gene FROM $isoformcode WHERE refseq_id LIKE ?" ); $name .= '%'; $sth -> execute ($name); my @refseq_map; while (my $row = $sth -> fetchrow_arrayref) { push (@refseq_map, @$row); } return @refseq_map; } # }}} # }}} # SECTION C (splice targets) {{{ #-------------------------------------------------- # SPLICE PROBES {{{ # only xxxx_s_at that have more than one target. #-------------------------------------------------- sub get_spliceprobes { my ($self) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $target_table = $chip.'targets'; my $sth = $dbh -> prepare ( "SELECT * FROM $target_table WHERE set_id LIKE '%_s_at' AND target_number > 1" ); $sth -> execute(); my @result; while (my $row = $sth -> fetchrow_arrayref) { my @splice_probe = @$row; push (@result, \@splice_probe); } return @result; } # }}} #-------------------------------------------------- # NORMAL PROBES {{{ # only xxxx_at that have more than one target. #-------------------------------------------------- sub get_normalprobes { my ($self) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $target_table = $chip.'targets'; my $sth = $dbh -> prepare ( "SELECT * FROM $target_table WHERE set_id REGEXP '^[0-9]*_at' AND target_number > 1" ); $sth -> execute(); my @result; while (my $row = $sth -> fetchrow_arrayref) { my @splice_probe = @$row; push (@result, \@splice_probe); } return @result; } # }}} #-------------------------------------------------- # X PROBES {{{ # only xxxx_at that have more than one target. #-------------------------------------------------- sub get_xprobes { my ($self) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $target_table = $chip.'targets'; my $sth = $dbh -> prepare ( "SELECT * FROM $target_table WHERE set_id LIKE '%_x_at' AND target_number > 1" ); $sth -> execute(); my @result; while (my $row = $sth -> fetchrow_arrayref) { my @splice_probe = @$row; push (@result, \@splice_probe); } return @result; } # }}} #-------------------------------------------------- # ALL PROBES {{{ # all probes that have more than one target. #-------------------------------------------------- sub get_allprobes { my ($self) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $target_table = $chip.'targets'; my $sth = $dbh -> prepare ( "SELECT * FROM $target_table WHERE target_number > 1" ); $sth -> execute(); my @result; while (my $row = $sth -> fetchrow_arrayref) { my @splice_probe = @$row; push (@result, \@splice_probe); } return @result; } # }}} #-------------------------------------------------- # PROBE PAIRS targets {{{ #-------------------------------------------------- sub get_diaprobepair { my ($self, $set_id, $x, $y) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $target_table = $chip.'RefSeqDiagnostic'; my $sth = $dbh -> prepare ( "SELECT * FROM $target_table WHERE set_id=? AND x=? AND y=?" ); $sth -> execute($set_id, $x, $y); my @result; while (my $row = $sth -> fetchrow_arrayref) { my @splice_probe = @$row; push (@result, \@splice_probe); } return @result; } # }}} #-------------------------------------------------- # mRNA PROBE PAIRS targets {{{ #-------------------------------------------------- sub get_rnadiaprobepair { my ($self, $set_id, $x, $y) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $target_table = $chip.'mRNADiagnostic'; my $sth = $dbh -> prepare ( "SELECT * FROM $target_table WHERE set_id=? AND x=? AND y=?" ); $sth -> execute($set_id, $x, $y); my @result; while (my $row = $sth -> fetchrow_arrayref) { my @splice_probe = @$row; push (@result, \@splice_probe); } return @result; } # }}} # }}} # }}} # affy {{{ sub _parse_affy { my($self) = @_; # # this methods handle the parsing of a affy_table # # get db handle my $dbh = $self -> getDBhandle(); # # Take affy table .csv as input # unless (open(AFFIFH, $self -> get_affyfile)) { croak ("Can't open file ". $self -> get_affyfile(),"\nError: $!"); } # # Database structure already exist, # I will put select/insert handler for dump data # # CREATE COMMON TABLE GENECHIPARRAY IF IT DOESN'T EXIST {{{ my $create_array_table = $dbh -> prepare ( "CREATE TABLE IF NOT EXISTS genechip_array ( chipcode char(100) primary key, genechip_name char(100) not null, organism char(100) not null, annotation_date date not null );" ); $create_array_table -> execute(); # # GENECHIP_ARRAY TABLE handler # my $genechip_select = $dbh -> prepare ('SELECT genechip_name FROM genechip_array WHERE genechip_name=?'); my $genechip_insert = $dbh -> prepare ('INSERT genechip_array (chipcode, genechip_name, organism, annotation_date) VALUES (?,?,?,?)'); # }}} # # PROBE TABLE handler {{{ # my $probeset_table_name = $self -> getChipName(); # N.B: mySQL doesn't like names with '-' minus inside so substitute with underscore # I put this inside getChipName method #-------------------------------------------------- # $probeset_table_name =~ s/-/_/gm; #-------------------------------------------------- # ------------------------------------------------------------------------------------------ # # get info about tables in the database # # HANDLER per la costruzione di una tabella di probe per ogni chip # # ------------------------------------------------------------------------------------------ # my $table_info = $dbh -> table_info; $table_info -> execute(); while (my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $probeset_table_name) { $table_info -> finish(); carp ("Table $probeset_table_name already exists\n"); return 0; } } # if I stay out of this loop, probe table doesn't exist yet my $create = $dbh -> prepare ( "CREATE TABLE $probeset_table_name ( set_id char(100) primary key, genechip char(100) not null, public_id char(100) not null, description longblob, cluster longblob, assignments longblob, notes longblob )" ); $create -> execute(); # # PROBE TABLE HANDLER # my $probe_select = $dbh -> prepare ("SELECT set_id FROM $probeset_table_name WHERE set_id=?"); my $probe_insert = $dbh -> prepare ("INSERT $probeset_table_name (set_id,genechip,public_id, description, cluster, assignments, notes) VALUES (?,?,?,?,?,?,?)"); my $search_probes = $dbh -> prepare ("SELECT set_id FROM $probeset_table_name WHERE public_id=?"); # }}} # # DESIGN table for chip named ($probeset_table_name).design {{{ # my $design_table_name = $probeset_table_name.'design'; while (my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $design_table_name) { $table_info -> finish(); carp ("Table $design_table_name already exists\n"); return 0; } } $create = $dbh -> prepare ( "CREATE TABLE $design_table_name ( public_id char(100) primary key, seq_type char(200) not null, seq_source char(200) not null, target_des longblob null, arch_unigene char(100) not null, trans_id char(100) not null )" ); $create -> execute(); # # DESIGN table HANDLER # my $design_select = $dbh -> prepare ("SELECT public_id FROM $design_table_name WHERE public_id=?"); my $design_insert = $dbh -> prepare ("INSERT $design_table_name (public_id, seq_type, seq_source, target_des, arch_unigene, trans_id) VALUES (?,?,?,?,?,?)" ); # }}} # # AFFYINFO TABLE HANDLER {{{ # # UPDATE removed overlaps column my $affyinfo_tablename = $probeset_table_name.'affyinfo'; while (my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $affyinfo_tablename) { $table_info -> finish(); carp ("Table $affyinfo_tablename already exists\n"); return 0; } } $create = $dbh -> prepare ( "CREATE TABLE $affyinfo_tablename ( set_id char(100) primary key, genome_version char(255) not null, alignments longblob )" ); $create -> execute(); my $affyinfo_select = $dbh -> prepare ("SELECT set_id FROM $affyinfo_tablename WHERE set_id=?"); my $affyinfo_insert = $dbh -> prepare ("INSERT $affyinfo_tablename (set_id, genome_version, alignments) VALUES (?,?,?)" ); # }}} # # AFFYNOTE TABLE HANDLER {{{ # # UPDATE added column Unigene cluster type # added Annotation Cluster, Description, Assignments, Notes my $affynote_tablename = $probeset_table_name.'affynote'; while (my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $affynote_tablename) { $table_info -> finish(); carp ("Table $affynote_tablename already exists\n"); return 0; } } $create = $dbh -> prepare ( "CREATE TABLE $affynote_tablename ( public_id char(100) primary key, gene_symbol char(100), gene_title char(255), chr_location char(255), unigene char(255), unigene_type char(255), ensembl char(255), locuslink char(255), swissprot char(255), ec char(255), omim char(255), refseq_prot char(255), refseq_tran char(255), flybase char(255), agi char(255), wormbase char(255), mgi char(255), rgd char(255), sgd char(255) )" ); $create -> execute(); my $affynote_select = $dbh -> prepare ("SELECT public_id FROM $affynote_tablename WHERE public_id=?"); my $affynote_insert = $dbh -> prepare ("INSERT $affynote_tablename (public_id, gene_symbol, gene_title, chr_location, unigene, unigene_type, ensembl, locuslink, swissprot, ec, omim, refseq_prot, refseq_tran, flybase, agi, wormbase, mgi, rgd, sgd) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" ); # }}} # # AFFYFUNC TABLE HANDLER {{{ # my $affyfunc_tablename = $probeset_table_name.'affyfunc'; while (my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $affyfunc_tablename) { $table_info -> finish(); carp ("Table $affyfunc_tablename already exists\n"); return 0; } } $create = $dbh -> prepare ( "CREATE TABLE $affyfunc_tablename ( public_id char(100) primary key, go_bio longblob, go_cell longblob, go_mol longblob, pathway longblob, prot_fam longblob, prot_dom longblob, interpro longblob, membrane longblob, qtl longblob )" ); $create -> execute(); my $affyfunc_select = $dbh -> prepare ("SELECT public_id FROM $affyfunc_tablename WHERE public_id=?"); my $affyfunc_insert = $dbh -> prepare ("INSERT $affyfunc_tablename (public_id, go_bio, go_cell, go_mol, pathway, prot_fam, prot_dom, interpro, membrane, qtl) VALUES (?,?,?,?,?,?,?,?,?,?)" ); # }}} # Parsing del file affy {{{ my $line; my @values; my $count=0; my $duplicate_probe=0; my $duplicate_chip=0; my $duplicate_design=0; my $duplicate_infos=0; my $duplicate_note=0; my $duplicate_func=0; my %designs_x; # # this loop take control of the file. The first line (HEADERS) is putted in the array @keys and is final value # every new line with values is putted in the array @values that is a dynamic array that change at every loop # I will place mySQL insert subroutine inside the loop else parse record. # while ($line = ) { chomp $line; if ($line=~/^\s$/) { # jump blank lines next; } elsif ($count == 0) { # parse _check_headers my @keys = _parse_csv($line); my $check = $self -> _check_headers(@keys); if ($check == 0) { die "Exit!"; } } else { # parse records @values = _parse_csv($line); # # store the values in a mySQL database # # FIRST make a check to verify that all columns are at standard position # # ANALYSIS: the values ar an array where the first element $values[0] is always the probe_id # first check if exist the probe_id into the mySQL probe table # # NOTE: Updated to Affymetrix Tables version Dec 2004 #-------------------------------------------------- # This column has been deleted by affymetrix.... # my $overlaps = $values[12]; # # This column has been added by affymetrix # my $cluster_type = $values[16]; # my $note_desc = $values[39]; # my $note_cluster = $values[40]; # my $note_notes = $values[42]; # my $trans_assign = $values[41]; my $row; my $set_id = $values[0]; my $genechip = $values[1]; my $organism = $values[2]; my $date = $values[3]; my $seq_type = $values[4]; my $seq_src = $values[5]; my $trans_id = $values[6]; my $target_des = $values[7]; my $public_id = $values[8]; my $arch_uni = $values[9]; my $unigene = $values[10]; my $genome_version = $values[11]; my $alignments = $values[12]; my $gene_title = $values[13]; my $gene_symbol = $values[14]; my $chr_location = $values[15]; my $cluster_type = $values[16]; my $ensembl = $values[17]; my $locuslink = $values[18]; my $swissprot = $values[19]; my $ec = $values[20]; my $omim = $values[21]; my $refseq_prot = $values[22]; my $refseq_tran = $values[23]; my $flybase = $values[24]; my $agi = $values[25]; my $wormbase = $values[26]; my $mgi = $values[27]; my $rgd = $values[28]; my $sgd = $values[29]; my $go_bio = $values[30]; my $go_cell = $values[31]; my $go_mol = $values[32]; my $pathway = $values[33]; my $prot_fam = $values[34]; my $prot_dom = $values[35]; my $interpro = $values[36]; my $membrane = $values[37]; my $qtl = $values[38]; my $note_desc = $values[39]; my $note_cluster = $values[40]; my $trans_assign = $values[41]; my $note_notes = $values[42]; # PROBE table update ONE TABLE FOR CHIP $probe_select -> execute($set_id); if ($row = $probe_select -> fetchrow_arrayref) { $duplicate_probe++; } else { # there isn't the probe into the mySQL table so insert it (set_id, genechip , public_id) $probe_insert -> execute($set_id,$genechip,$public_id, $note_desc, $note_cluster, $trans_assign, $note_notes); } # GENECHIP_ARRAY TABLE UPDATE $genechip_select -> execute($genechip); if ($row = $genechip_select -> fetchrow_arrayref) { $duplicate_chip++; } else { # unless exist the chip insert the chip into the mySQL database # var $probeset_table_name is eq to chipcode $date = _parse_date($date); $genechip_insert -> execute ($probeset_table_name,$genechip,$organism,$date); } # DESIGN TABLE UPDATE $design_select -> execute($public_id); if ($row = $design_select -> fetchrow_arrayref) { $duplicate_design++; #-------------------------------------------------- # create a new hash value with key $$row[0] <-- that is public_id #-------------------------------------------------- $designs_x{$$row[0]} = []; } else { $design_insert -> execute ($public_id,$seq_type,$seq_src,$target_des,$arch_uni,$trans_id); } # AFFYINFO TABLE UPDATE # Updated: removed overlaps $affyinfo_select -> execute ($set_id); if ($row = $affyinfo_select -> fetchrow_arrayref) { $duplicate_infos++; } else { $affyinfo_insert -> execute ($set_id,$genome_version,$alignments); } # AFFYNOTE TABLE UPDATE # updated: added cluster type $affynote_select -> execute ($public_id); if ($row = $affynote_select -> fetchrow_arrayref) { $duplicate_note++; } else { $affynote_insert -> execute ($public_id, $gene_symbol, $gene_title, $chr_location, $unigene, $cluster_type, $ensembl, $locuslink, $swissprot, $ec, $omim, $refseq_prot, $refseq_tran, $flybase, $agi, $wormbase, $mgi, $rgd, $sgd); } # AFFYFUNC TABLE UPDATE $affyfunc_select -> execute($public_id); if ($row = $affyfunc_select -> fetchrow_arrayref) { $duplicate_func++; } else { $affyfunc_insert -> execute ($public_id,$go_bio,$go_cell,$go_mol,$pathway,$prot_fam,$prot_dom,$interpro,$membrane,$qtl); } } $count++; } # }}} # create INDEX {{{ my $design_index = $dbh -> prepare ( "CREATE INDEX design_index ON $design_table_name (public_id)" ); $design_index -> execute(); my $info_index = $dbh -> prepare ( "CREATE INDEX affy_info ON $affyinfo_tablename (set_id)" ); $info_index -> execute(); my $note_index = $dbh -> prepare ( "CREATE INDEX affynote ON $affynote_tablename (public_id)" ); $note_index -> execute(); my $note2_index = $dbh -> prepare ( "CREATE INDEX affyrefseq ON $affynote_tablename (refseq_tran)" ); $note2_index -> execute(); my $func_index = $dbh -> prepare ( "CREATE INDEX affyfunc ON $affyfunc_tablename (public_id)" ); $func_index -> execute(); my $probeset_index = $dbh -> prepare ( "CREATE INDEX probeset ON $probeset_table_name (set_id)" ); $probeset_index -> execute(); my $public_index = $dbh -> prepare ( "CREATE INDEX set2public ON $probeset_table_name (public_id)" ); $public_index -> execute(); print "INDEX FOR GROUP TABLE NAME: $probeset_table_name\n"; print "Index Affy tables for $probeset_table_name complete!\n"; # }}} # # SUMMARY {{{ # # print a little summary # for nmbr of records i subtract the first line (headers) my $summary = $self -> getSummary(); my $record_count = $count - 1; print "\n-----AFFY-SUMMARY----------\n\n$record_count record parsed\n$duplicate_probe probeset duplication avoided\n$duplicate_chip chip duplication avoided\n$duplicate_design design duplication avoided\n$duplicate_infos affy alignments duplication avoided\n$duplicate_note affy notes duplication avoided\n$duplicate_func affy functional annotation duplication avoided\n"; print "\n---------------------------\n"; print "\n\n------------END------------\n\n\n\n"; if ($summary and $summary == 1) { my $report = "report.$$.txt"; open (REPORT, "> $report") or die ("Can't create file $report cause: $!"); print "\nIn the file $report you will find a list of the duplicated public ID.\n"; foreach my $key (keys %designs_x) { $search_probes -> execute($key); while (my @probes_x = $search_probes -> fetchrow_array()) { push (@{ $designs_x{$key} }, @probes_x); } print REPORT "$key: @{$designs_x{$key}}","\n"; } close (REPORT); } close (AFFIFH); return 1; # }}} } # }}} # licr {{{ sub _parse_licr { # handle the parsing of the licr file my ($self) = @_; my $dbh = $self -> getDBhandle; my $chipname = $self -> getChipName; my $probes_table = $chipname.'probes'; my $licr_table_name = $chipname.'licrinfo'; # take licr table as input unless (open(LICRFH, $self -> get_licrfile)) { die("Can't open file ".$self -> get_licrfile(),"\nError: $!"); } # ------------------------------------------------------------------------------------------ # # get info about tables in the database # # HANDLER per la costruzione di una tabella di single probes per ogni chip # # ------------------------------------------------------------------------------------------ # # # $row contain: TABLE_CAT at [0] TABLE_SCHEM at [1] and TABLE_NAME at [2] # #-------------------------------------------------- # TABLE_CAT: Table catalog identifier. This field is NULL (undef) if not applicable to the data source, which is usually the case. This field is empty if not applicable to the table. # TABLE_SCHEM: The name of the schema containing the TABLE_NAME value. This field is NULL (undef) if not applicable to data source, and empty if not applicable to the table. # TABLE_NAME: Name of the table (or view, synonym, etc). #-------------------------------------------------- my $table_info = $dbh -> table_info; $table_info -> execute; while ( my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $probes_table) { $table_info -> finish(); print "Table $probes_table already exists\n"; return 0; } } # so, probes table doesn't exist yet, prepare statement handle with autoincrement for probe_id my $create = $dbh -> prepare ( "CREATE TABLE $probes_table ( probe_id int(11) NOT NULL auto_increment, licr_id char(100), x int(11) NOT NULL default '0', y int(11) NOT NULL default '0', oligo char(255) not null default '', set_id char(100) not null, position char(255), strand char(10), PRIMARY KEY (probe_id))" ); $create -> execute(); #-------------------------------------------------- # Table SWISSINFO store information about licr annotation #-------------------------------------------------- my $create_info = $dbh -> prepare( "CREATE TABLE $licr_table_name ( licr_id char(100) not null PRIMARY KEY, seq_type char(255), gene_symbol char(100), unigene char(100), band char(100) )" ); $create_info -> execute(); # # HANDLERS # # devo aggiungere anche position alla query? (tolta per accellerare l'inserimento) #-------------------------------------------------- # my $probes_select = $dbh -> prepare ("SELECT probe_id FROM $probes_table WHERE set_id=? AND licr_id=? AND x=? AND y=?"); #-------------------------------------------------- my $probes_insert = $dbh -> prepare ("INSERT $probes_table (probe_id,licr_id,x,y,oligo,set_id,position,strand) values (NULL,?,?,?,?,?,?,?)"); my $search_probes = $dbh -> prepare ("SELECT probe_id FROM $probes_table WHERE licr_id=?"); my $licr_select = $dbh -> prepare ("SELECT licr_id FROM $licr_table_name WHERE licr_id=?"); my $licr_insert = $dbh -> prepare ("INSERT $licr_table_name (licr_id, seq_type, gene_symbol, unigene, band) values (?, ?, ?, ?, ?)"); # PARSE LICR {{{ # Parsing del file licr my $line; my $row; my @values; my $count=0; my $duplicate_probe=0; my $probes_orfani=0; my $duplicate_licr=0; my %licr_x; # this loop take control of the file. The first line (HEADERS) is putted in the array @keys and is final value # every new line with values is putted in the array @values that is a dynamic array that change at every loop # I will place mySQL insert subroutine inside the loop else parse record. # # Alcune considerazioni sulle tabelle licr: # Linea, campi separati da ; # Posso trovare una linea che ha solo info relative a probes: 1320_at|X:195|Y:205;CATATCTTGAAGAGATCCAGTCTGT;;;;;;; # # STRUCTURE: set_id | x:n | y:n ; oligo ; licr_id ; seq_type ; (strand) ; position ; unigene ; gene_symbol ; band # No headers while ($line = ) { chomp $line; if($line =~ /^\s$/) { #JUMP blank lines next; } else { # parse semicolon separeted values. @values contain all the values of a single line. @values = _parse_ssv($line); $values[0] =~ m/(.*)\|X:(.*)\|Y:(.*)/g or die "Can't parse this value: $values[0], I will exit"; my $set_id = $1; my $x = $2; my $y = $3; my $oligo = $values[1]; my $licr_id = $values[2]; my $seq_type = $values[3]; my $strand = $values[4]; my $position = $values[5]; my $unigene = $values[6]; my $gene_symbol = $values[7]; my $band = $values[8]; # # a questo punto si pone un problema: Come evitare duplicazioni? Vogliamo evitare duplicazioni? # in linea di principio: un record e' duplicato se TUTTE queste chiavi sono uguali: set_id, licr_id, x and y # siamo d'accordo? # PROBES TABLE UPDATE (tolto l'if per aumentare velocita') #-------------------------------------------------- # $probes_select -> execute($set_id, $licr_id, $x, $y); # if ($row = $probes_select -> fetchrow_arrayref) { # $duplicate_probe++; # } else { # $probes_insert -> execute($licr_id,$x,$y,$oligo,$set_id,$position,$strand); # } #-------------------------------------------------- $probes_insert -> execute($licr_id,$x,$y,$oligo,$set_id,$position,$strand); # # SWISSINFO TABLE UPDATE # N.B.: ci sono entry cosi: 1861_at|X:600|Y:661;TATGGCCGCGAGCTCCGGAGGATGA;;;;;;; # in cui ho solo info relative al probe. Considero il caso in cui $licr_id e' undef # if (defined ($licr_id)) { $licr_select -> execute($licr_id); if ($row = $licr_select -> fetchrow_arrayref) { $duplicate_licr++; #-------------------------------------------------- # create a new hash value with key $$row[0] <-- that is licr_id #-------------------------------------------------- $licr_x{$$row[0]} = []; } else { # if have found some lines with no info from licr.... in tha case into the seq_type attribute i found strand (+) # UPDATED --> _parse_ssv mehd is now up to date $licr_insert -> execute($licr_id, $seq_type, $gene_symbol, $unigene, $band); } } else { $probes_orfani++; } $count++; } } # }}} # create INDEX {{{ my $probes_index = $dbh -> prepare ( "CREATE INDEX probes ON $probes_table (set_id)" ); $probes_index -> execute(); my $probes2_index = $dbh -> prepare ( "CREATE INDEX probes2licr ON $probes_table (licr_id)" ); $probes2_index -> execute(); my $licr_index = $dbh -> prepare ( "CREATE INDEX licr ON $licr_table_name (licr_id)" ); $licr_index -> execute(); my $licr2_index = $dbh -> prepare ( "CREATE INDEX licr2unigene ON $licr_table_name (unigene)" ); $licr2_index -> execute(); print "Index tables Licr complete!\n"; # }}} # # SUMMARY {{{ # my $summary = $self -> getSummary(); print "\n-----LICR-SUMMARY----------\n\n$count record parsed\n$duplicate_probe probe duplication avoided\n\n$duplicate_licr licr_design duplication avoided\n\n$probes_orfani probes without licr information\n"; print "\n---------------------------\n"; print "\n\n------------END------------\n"; if ($summary and $summary == 1) { my $licr_report = "licr_report.$$.txt"; print "\nIn the file $licr_report you will find a list of the duplicated licr ID.\n"; open (SWISSREPORT, "> $licr_report") or die ("Can't create regular file $licr_report cause: $!"); foreach my $key (keys %licr_x) { $search_probes -> execute ($key); while (my @probes_x = $search_probes -> fetchrow_array()) { push (@{ $licr_x{$key} }, @probes_x); } print SWISSREPORT "$key: @{$licr_x{$key}}","\n"; } close (SWISSREPORT); } close (LICRFH); return 1; # }}} } # }}} # licrrna {{{ sub _parse_licrrna { # handle the parsing of the licr file my ($self) = @_; my $dbh = $self -> getDBhandle; my $chipname = $self -> getChipName; my $probes_table = $chipname.'RNAprobes'; my $licr_table_name = $chipname.'RNAlicrinfo'; # take licr table as input unless (open(LICRFH, $self -> get_licrrna)) { die("Can't open file ".$self -> get_licrrna(),"\nError: $!"); } # ------------------------------------------------------------------------------------------ # # get info about tables in the database # # HANDLER per la costruzione di una tabella di single probes per ogni chip # # ------------------------------------------------------------------------------------------ # # # $row contain: TABLE_CAT at [0] TABLE_SCHEM at [1] and TABLE_NAME at [2] # #-------------------------------------------------- # TABLE_CAT: Table catalog identifier. This field is NULL (undef) if not applicable to the data source, which is usually the case. This field is empty if not applicable to the table. # TABLE_SCHEM: The name of the schema containing the TABLE_NAME value. This field is NULL (undef) if not applicable to data source, and empty if not applicable to the table. # TABLE_NAME: Name of the table (or view, synonym, etc). #-------------------------------------------------- my $table_info = $dbh -> table_info; $table_info -> execute; while ( my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $probes_table) { $table_info -> finish(); print "Table $probes_table already exists\n"; return 0; } } # so, probes table doesn't exist yet, prepare statement handle with autoincrement for probe_id my $create = $dbh -> prepare ( "CREATE TABLE $probes_table ( probe_id int(11) NOT NULL auto_increment, licr_id char(100), x int(11) NOT NULL default '0', y int(11) NOT NULL default '0', oligo char(255) not null default '', set_id char(100) not null, position char(255), strand char(10), PRIMARY KEY (probe_id))" ); $create -> execute(); #-------------------------------------------------- # Table SWISSINFO store information about licr annotation #-------------------------------------------------- my $create_info = $dbh -> prepare( "CREATE TABLE $licr_table_name ( licr_id char(100) not null PRIMARY KEY, seq_type char(255), gene_symbol char(100), unigene char(100), band char(100) )" ); $create_info -> execute(); # # HANDLERS # # devo aggiungere anche position alla query? (tolta per accellerare l'inserimento) #-------------------------------------------------- # my $probes_select = $dbh -> prepare ("SELECT probe_id FROM $probes_table WHERE set_id=? AND licr_id=? AND x=? AND y=?"); #-------------------------------------------------- my $probes_insert = $dbh -> prepare ("INSERT $probes_table (probe_id,licr_id,x,y,oligo,set_id,position,strand) values (NULL,?,?,?,?,?,?,?)"); my $search_probes = $dbh -> prepare ("SELECT probe_id FROM $probes_table WHERE licr_id=?"); my $licr_select = $dbh -> prepare ("SELECT licr_id FROM $licr_table_name WHERE licr_id=?"); my $licr_insert = $dbh -> prepare ("INSERT $licr_table_name (licr_id, seq_type, gene_symbol, unigene, band) values (?, ?, ?, ?, ?)"); # Parsing del file licr {{{ my $line; my $row; my @values; my $count=0; my $duplicate_probe=0; my $probes_orfani=0; my $duplicate_licr=0; my %licr_x; # # this loop take control of the file. The first line (HEADERS) is putted in the array @keys and is final value # every new line with values is putted in the array @values that is a dynamic array that change at every loop # I will place mySQL insert subroutine inside the loop else parse record. # # Alcune considerazioni sulle tabelle licr: # Linea, campi separati da ; # Posso trovare una linea che ha solo info relative a probes: 1320_at|X:195|Y:205;CATATCTTGAAGAGATCCAGTCTGT;;;;;;; # # STRUCTURE: set_id | x:n | y:n ; oligo ; licr_id ; seq_type ; (strand) ; position ; unigene ; gene_symbol ; band # No headers while ($line = ) { chomp $line; if($line =~ /^\s$/) { #JUMP blank lines next; } else { # parse semicolon separeted values. @values contain all the values of a single line. @values = _parse_ssv($line); $values[0] =~ m/(.*)\|X:(.*)\|Y:(.*)/g or die "Can't parse this value: $values[0], I will exit"; my $set_id = $1; my $x = $2; my $y = $3; my $oligo = $values[1]; my $licr_id = $values[2]; my $seq_type = $values[3]; my $strand = $values[4]; my $position = $values[5]; my $unigene = $values[6]; my $gene_symbol = $values[7]; my $band = $values[8]; # # a questo punto si pone un problema: Come evitare duplicazioni? Vogliamo evitare duplicazioni? # in linea di principio: un record e' duplicato se TUTTE queste chiavi sono uguali: set_id, licr_id, x and y # siamo d'accordo? # PROBES TABLE UPDATE (tolto l'if per aumentare velocita') #-------------------------------------------------- # $probes_select -> execute($set_id, $licr_id, $x, $y); # if ($row = $probes_select -> fetchrow_arrayref) { # $duplicate_probe++; # } else { # $probes_insert -> execute($licr_id,$x,$y,$oligo,$set_id,$position,$strand); # } #-------------------------------------------------- $probes_insert -> execute($licr_id,$x,$y,$oligo,$set_id,$position,$strand); # # SWISSINFO TABLE UPDATE # N.B.: ci sono entry cosi: 1861_at|X:600|Y:661;TATGGCCGCGAGCTCCGGAGGATGA;;;;;;; # in cui ho solo info relative al probe. Considero il caso in cui $licr_id e' undef # if (defined ($licr_id)) { $licr_select -> execute($licr_id); if ($row = $licr_select -> fetchrow_arrayref) { $duplicate_licr++; #-------------------------------------------------- # create a new hash value with key $$row[0] <-- that is licr_id #-------------------------------------------------- $licr_x{$$row[0]} = []; } else { # if have found some lines with no info from licr.... in tha case into the seq_type attribute i found strand (+) # UPDATED --> _parse_ssv mehd is now up to date $licr_insert -> execute($licr_id, $seq_type, $gene_symbol, $unigene, $band); } } else { $probes_orfani++; } $count++; } } # }}} # create INDEX {{{ my $probes_index = $dbh -> prepare ( "CREATE INDEX probes ON $probes_table (set_id)" ); $probes_index -> execute(); my $probes2_index = $dbh -> prepare ( "CREATE INDEX probes2licr ON $probes_table (licr_id)" ); $probes2_index -> execute(); my $licr_index = $dbh -> prepare ( "CREATE INDEX licr ON $licr_table_name (licr_id)" ); $licr_index -> execute(); my $licr2_index = $dbh -> prepare ( "CREATE INDEX licr2unigene ON $licr_table_name (unigene)" ); $licr2_index -> execute(); print "Index tables Licr complete!\n"; # }}} # # SUMMARY {{{ # my $summary = $self -> getSummary(); print "\n-----RNA-LICR-SUMMARY----------\n\n$count record parsed\n$duplicate_probe probe duplication avoided\n\n$duplicate_licr licr_design duplication avoided\n\n$probes_orfani probes without licr information\n"; print "\n---------------------------\n"; print "\n\n------------END------------\n"; if ($summary and $summary == 1) { my $licr_report = "licr_report.$$.txt"; print "\nIn the file $licr_report you will find a list of the duplicated licr ID.\n"; open (SWISSREPORT, "> $licr_report") or die ("Can't create regular file $licr_report cause: $!"); foreach my $key (keys %licr_x) { $search_probes -> execute ($key); while (my @probes_x = $search_probes -> fetchrow_array()) { push (@{ $licr_x{$key} }, @probes_x); } print SWISSREPORT "$key: @{$licr_x{$key}}","\n"; } close (SWISSREPORT); } close (LICRFH); return 1; # }}} } # }}} # Insert into database Exon map {{{ sub _parse_refseq_exonmap { # Handling RefSeq exon Map my ($self) = @_; my $dbh = $self -> getDBhandle; my $refseqcode = $self -> get_refseqcode(); # take RefSeq table as input unless (open(EXONSFH, $self -> get_refseqfile)) { die("Can't open file ".$self -> get_refseqfile(),"\nError: $!"); } my $table_info = $dbh -> table_info; $table_info -> execute; while ( my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $refseqcode) { $table_info -> finish(); $dbh -> disconnect(); print "Table $refseqcode already exists\n"; return 0; } } # prepare statement handle with autoincrement for RefSeq id my $create = $dbh -> prepare ( "CREATE TABLE $refseqcode ( refseq_id int(11) NOT NULL auto_increment, name char(100), chromosome char(10), strand char(10), txStart int(20) NOT NULL default '0', txStop int(20) not null default '0', cdsStart int(20) NOT NULL default '0', cdsStop int(20) NOT NULL default '0', exonCount int(10) NOT NULL default '0', exonStarts longblob, exonStops longblob, PRIMARY KEY (refseq_id))" ); $create -> execute(); my $refseq_select = $dbh -> prepare ("SELECT refseq_id FROM $refseqcode WHERE name=?"); my $refseq_insert = $dbh -> prepare ("INSERT $refseqcode (name, chromosome,strand, txStart, txStop, cdsStart, cdsStop, exonCount, exonStarts, exonStops) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"); my $count = 0; my $line; my $row; # open file exons {{{ while ($line = ) { chomp $line; ($line=~/^\#/) && next; my ($name,$chrom,$strand,$txStart,$txEnd,$cdsStart,$cdsEnd,$exonCount,$exonStarts,$exonEnds) = split (/\t/,$line); # is possible to find duplicated names (strand - and +) # for now only BLIND insert $refseq_insert -> execute($name,$chrom,$strand,$txStart,$txEnd,$cdsStart,$cdsEnd,$exonCount,$exonStarts,$exonEnds); $count ++; #-------------------------------------------------- # if (defined ($name)) { # $refseq_select -> execute($name); # if ($row = $refseq_select -> fetchrow_arrayref) { # print "Founded a duplicate?$name\n"; # } else { # $refseq_insert -> execute($name,$chrom,$strand,$txStart,$txEnd,$cdsStart,$cdsEnd,$exonCount,$exonStarts,$exonEnds); # $count++; # } # } #-------------------------------------------------- } # }}} # create INDEX on name my $refseq_index = $dbh -> prepare ( "CREATE INDEX refseq_index ON $refseqcode (name)" ); $refseq_index -> execute(); print "Index table RefSeq complete!\n"; # # SUMMARY {{{ # my $summary = $self -> getSummary(); print "\n-----REFSEQ-SUMMARY----------\n\n$count record parsed\n\n"; print "\n---------------------------\n"; print "\n\n------------END------------\n"; close (EXONSFH); return 1; # }}} } # }}} # Insert into database CDNA map {{{ sub _parse_cdna_exonmap { # Handling RefSeq exon Map my ($self) = @_; my $dbh = $self -> getDBhandle; my $cdnacode = $self -> get_cdnacode(); # take RefSeq table as input unless (open(EXONSFH, $self -> get_cdnafile)) { die("Can't open file ".$self -> get_cdnafile(),"\nError: $!"); } my $table_info = $dbh -> table_info; $table_info -> execute; while ( my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $cdnacode) { $table_info -> finish(); $dbh -> disconnect(); print "Table $cdnacode already exists\n"; return 0; } } # prepare statement handle with autoincrement for RefSeq id my $create = $dbh -> prepare ( "CREATE TABLE $cdnacode ( cdna_id int(11) NOT NULL auto_increment, name char(100), chromosome char(10), strand char(10), txStart int(20) NOT NULL default '0', txStop int(20) not null default '0', exonCount int(10) NOT NULL default '0', exonStarts longblob, exonStops longblob, PRIMARY KEY (cdna_id))" ); $create -> execute(); my $cdna_select = $dbh -> prepare ("SELECT cdna_id FROM $cdnacode WHERE name=?"); my $cdna_insert = $dbh -> prepare ("INSERT $cdnacode (name, chromosome, strand, txStart, txStop, exonCount, exonStarts, exonStops) values (?, ?, ?, ?, ?, ?, ?, ?)"); my $count = 0; my $line; my $row; # open file cdna {{{ while ($line = ) { chomp $line; ($line=~/^\#/) && next; my ($bin,$matches,$misMatch,$repMatch,$nCount,$qNumInsert,$qbaseInsert,$tNumInsert,$tBaseInsert,$strand,$name,$qsize,$qstart,$qend,$chrom,$tSize,$txStart,$txEnd,$exonCount,$exonSize,$qstarts,$exonStarts) = split (/\t/,$line); my @exonStarts = split (/,/ , $exonStarts); my @exonSizes = split (/,/ , $exonSize); my @exonStops; # Compute Exons Stops (remeber 0-based philosophy of UCSC) for (my $a=0; $a<=$#exonStarts; $a++) { $exonStarts[$a] += 1; $exonStops[$a] = $exonStarts[$a] + $exonSizes[$a]; } #reconstruct $exonStarts and $exonStops $exonStarts = ''; my $exonEnds = ''; for (my $b=0; $b<=$#exonStarts; $b++) { $exonStarts .= "$exonStarts[$b],"; $exonEnds .= "$exonStops[$b],"; } # ADD +1 to txStart $txStart += 1; # is possible to find duplicated names (strand - and +) # for now only BLIND insert $cdna_insert -> execute($name,$chrom,$strand,$txStart,$txEnd,$exonCount,$exonStarts,$exonEnds); $count ++; } # }}} # create INDEX on name my $cdna_index = $dbh -> prepare ( "CREATE INDEX cdna_index ON $cdnacode (name)" ); $cdna_index -> execute(); print "Index table cDNA complete!\n"; # # SUMMARY {{{ # my $summary = $self -> getSummary(); print "\n-----CDNA-SUMMARY----------\n\n$count record parsed\n\n"; print "\n---------------------------\n"; print "\n\n------------END------------\n"; close (EXONSFH); return 1; # }}} } # }}} # Insert into database RefSeq Diagnostics ProbePairs {{{ sub _parse_probepairs { # GET HANDLERS my ($self) = @_; my $dbh = $self -> getDBhandle; my $chipname = $self -> getChipName; my $probes_table = $chipname.'RefSeqDiagnostic'; # take RefSeq table as input unless (open(SPLICEFH, $self -> get_probepairs)) { die("Can't open file ".$self -> get_probepairs(),"\nError: $!"); } my $table_info = $dbh -> table_info; $table_info -> execute; while ( my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $probes_table) { $table_info -> finish(); $dbh -> disconnect(); print "Table $probes_table already exists\n"; return 0; } } # prepare statement handle with autoincrement for RefSeq id my $create = $dbh -> prepare ( "CREATE TABLE $probes_table ( diagnostic_probe_id int(11) NOT NULL auto_increment, set_id char(100) not null, x int(11) NOT NULL default '0', y int(11) NOT NULL default '0', oligo char(255) not null default '', strand char(10), PRIMARY KEY (diagnostic_probe_id))" ); $create -> execute(); my $probepair_select = $dbh -> prepare ("SELECT diagnostic_probe_id FROM $probes_table WHERE name=?"); my $probepair_insert = $dbh -> prepare ("INSERT $probes_table (set_id, x, y, oligo, strand) values (?, ?, ?, ?, ?)"); my $count = 0; my $line; my $row; # open file PPD {{{ while ($line = ) { chomp $line; ($line=~/^\#/) && next; my ($set_id, $x, $y, $oligo, $strand) = split (/\t/,$line); $probepair_insert -> execute($set_id, $x, $y, $oligo, $strand); $count ++; } # }}} # create INDEX on name my $pp_index = $dbh -> prepare ( "CREATE INDEX probepair_index ON $probes_table (set_id)" ); $pp_index -> execute(); print "Index table ProbePairs complete!\n"; # # SUMMARY {{{ # my $summary = $self -> getSummary(); print "\n-----PROBE PAIRS-SUMMARY----------\n\n$count record parsed\n\n"; print "\n---------------------------\n"; print "\n\n------------END------------\n"; close (SPLICEFH); return 1; # }}} } # }}} # Insert into database mRNA Diagnostics ProbePairs {{{ sub _parse_rnaprobepairs { # GET HANDLERS my ($self) = @_; my $dbh = $self -> getDBhandle; my $chipname = $self -> getChipName; my $probes_table = $chipname.'mRNADiagnostic'; # take RefSeq table as input unless (open(SPLICEFH, $self -> get_rnaprobepairs)) { die("Can't open file ".$self -> get_rnaprobepairs(),"\nError: $!"); } my $table_info = $dbh -> table_info; $table_info -> execute; while ( my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $probes_table) { $table_info -> finish(); $dbh -> disconnect(); print "Table $probes_table already exists\n"; return 0; } } # prepare statement handle with autoincrement for RefSeq id my $create = $dbh -> prepare ( "CREATE TABLE $probes_table ( diagnostic_probe_id int(11) NOT NULL auto_increment, set_id char(100) not null, x int(11) NOT NULL default '0', y int(11) NOT NULL default '0', oligo char(255) not null default '', strand char(10), PRIMARY KEY (diagnostic_probe_id))" ); $create -> execute(); my $probepair_select = $dbh -> prepare ("SELECT diagnostic_probe_id FROM $probes_table WHERE name=?"); my $probepair_insert = $dbh -> prepare ("INSERT $probes_table (set_id, x, y, oligo, strand) values (?, ?, ?, ?, ?)"); my $count = 0; my $line; my $row; # open file exons {{{ while ($line = ) { chomp $line; ($line=~/^\#/) && next; my ($set_id, $x, $y, $oligo, $strand) = split (/\t/,$line); $probepair_insert -> execute($set_id, $x, $y, $oligo, $strand); $count ++; } # }}} # create INDEX on name my $pp_index = $dbh -> prepare ( "CREATE INDEX probepair_index ON $probes_table (set_id)" ); $pp_index -> execute(); print "Index table ProbePairs complete!\n"; # # SUMMARY {{{ # my $summary = $self -> getSummary(); print "\n-----PROBE PAIRS-SUMMARY----------\n\n$count record parsed\n\n"; print "\n---------------------------\n"; print "\n\n------------END------------\n"; close (SPLICEFH); return 1; # }}} } # }}} # Insert ISOFORMS {{{ sub _parse_hisoforms { # GET HANDLERS my ($self) = @_; my $dbh = $self -> getDBhandle; my $refseq_code = $self -> get_refseqcode(); my $isoforms_table = $refseq_code.'isoforms'; # take table as input unless (open(ISOFH, $self -> get_hisoforms)) { die("Can't open file ".$self -> get_hisoforms(),"\nError: $!"); } my $table_info = $dbh -> table_info; $table_info -> execute; while ( my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $isoforms_table) { $table_info -> finish(); $dbh -> disconnect(); print "Table $isoforms_table already exists\n"; return 0; } } # prepare statement handle with autoincrement for RefSeq id my $create = $dbh -> prepare ( "CREATE TABLE $isoforms_table ( isoform_id int(11) NOT NULL auto_increment, refseq_id char(100) not null, gene char(255), unigene char(255), cytoband char(255), description longblob, PRIMARY KEY (isoform_id))" ); $create -> execute(); my $isoform_select = $dbh -> prepare ("SELECT isoform_id FROM $isoforms_table WHERE name=?"); my $isoform_insert = $dbh -> prepare ("INSERT $isoforms_table (refseq_id, gene, unigene, cytoband, description) values (?, ?, ?, ?, ?)"); my $count = 0; my $line; my $row; # open file isoforms {{{ while ($line = ) { chomp $line; ($line=~/^\#/) && next; my ($refseq_id, $gene, $unigene, $cytoband, $description) = split (/\t/,$line); $isoform_insert -> execute($refseq_id, $gene, $unigene, $cytoband, $description); $count ++; } # }}} # create INDEX on name my $pp_index = $dbh -> prepare ( "CREATE INDEX isoform_index ON $isoforms_table (refseq_id)" ); $pp_index -> execute(); print "Index table Isoforms complete!\n"; # # SUMMARY {{{ # my $summary = $self -> getSummary(); print "\n--------IFOFORMS-SUMMARY----------\n\n$count record parsed\n\n"; print "\n---------------------------\n"; print "\n\n------------END------------\n"; close (ISOFH); return 1; # }}} } # }}} # parse_ssv {{{ #-------------------------------------------------- # PARSE SSV UPDATED to same RegExp of _parse_csv #-------------------------------------------------- sub _parse_ssv { my $text = shift; my @new = (); #-------------------------------------------------- # REGEXP ($+ is think positive and look forward) #-------------------------------------------------- push (@new, $+) while $text =~ m { # match a pattern ( # match $1 [^\"\\] # $1 DON'T match \" (escaped quotes) or backslash \ * # any character 0 or more times (?:\\. # a group defined by (?:pattern) where pattern is backslash and any char [^\"\\] # except \" or \ .... any char is (.*) *)* # any char )" # end $1 ;? # a semicolon and 0 or 1 time | # OR ([^;]+);? # a $1 with 1 or more char NOT semicolon | # OR ; # a semicolon }gx; # /g for global # /x for comment inside regexp push(@new,undef) if substr($text,-1,1) eq ';'; return @new; } # }}} # parse_csv {{{ #-------------------------------------------------- # PARSE CSV #-------------------------------------------------- sub _parse_csv { my $text = shift; my @new = (); # # mnemonic $+ is be positive and loof forward: ritorna il prossimo match # this sub take a vule at $+ and push it into the array if the text match with regexp # @new contains all the values from ths initial csv file for 1 line. # # ----------------------------------------------------------------------------------- # REGEXP # ----------------------------------------------------------------------------------- push (@new, $+) while $text =~ m { # match a pattern "( # match quotes "($1)" with inside: $1 [^\"\\] # $1 DON'T match \" (escaped quotes) or backslash \ * # any character 0 or more times (?:\\. # a group defined by (?:pattern) where pattern is backslash and any char [^\"\\] # except \" or \ .... any char is (.*) *)* # any char )" # end $1 ,? # a comma and 0 or 1 time | # OR ([^,]+),? # a $1 with 1 or more char NOT comma (,) and a comma 0-1 times | # OR , # a comma }gx; # /g for global # /x for comment inside regexp push (@new, undef) if substr($text, -1,1) eq ','; # push value in @new se l'ultimo carattere e' una virgola return @new; # list of values that were comma-separated } # }}} # parse_date {{{ #-------------------------------------------------- # PARSE DATE #-------------------------------------------------- sub _parse_date { # convert dates between affy format (Jun 23,2004) and mySQL format (2004-06-23) my $date = $_[0]; my(%date_converter) = ( 'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06', 'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' ); $date =~ /(.*) (\d*), (\d*)/; my $month = $1; my $day = $2; my $year = $3; # check the hash table for months if (exists $date_converter{$month}) { $month = $date_converter{$month}; } else { die "Bad date at $!"; } my $separetor = '-'; my $new_date = $year.$separetor.$month.$separetor.$day; return ($new_date); } # }}} # headers UPDATED to version 23 Mar 2005 with header change {{{ { my @headers = ( "Probe Set ID", "GeneChip Array", "Species Scientific Name", "Annotation Date", "Sequence Type", "Sequence Source", "Transcript ID(Array Design)", "Target Description", "Representative Public ID", "Archival UniGene Cluster", "UniGene ID", "Genome Version", "Alignments", "Gene Title", "Gene Symbol", "Chromosomal Location", "Unigene Cluster Type", "Ensembl", "Entrez Gene", "SwissProt", "EC", "OMIM", "RefSeq Protein ID", "RefSeq Transcript ID", "FlyBase", "AGI", "WormBase", "MGI Name", "RGD Name", "SGD accession number", "Gene Ontology Biological Process", "Gene Ontology Cellular Component", "Gene Ontology Molecular Function", "Pathway", "Protein Families", "Protein Domains", "InterPro", "Trans Membrane", "QTL", "Annotation Description", "Annotation Transcript Cluster", "Transcript Assignments", "Annotation Notes" ); sub _check_headers { # This subroutine take trace of the affy table structure and verifiy any change in column position # check if headers strucuture is the same of @headers or are changed. If are changed return 0 else return 1. my ($self,@keys) = @_; for (my $i = 0; $i <= $#headers; $i++) { if ($headers[$i] eq $keys[$i]) { next; } else { my $affy_file = $self -> get_affyfile(); print "Seems that the affy table structure are changed! This module can't handle change in the data position but you can manually adjust small errors.\n"; print "\n -->$keys[$i]<-- IS NOT EQUAL TO -->$headers[$i]<--\n\n"; print "If the column mantain the same data but headers are differents (ES: \"MGI name\" and \"MGIname\")\nyou can open $affy_file and change:"; print "\n\n-->$keys[$i]\nTO\n-->$headers[$i]\n\n"; print "I will drop the new tables so you can restart the script after manual modification of the csv file!\n\n"; my $probeset_table_name = $self -> getChipName(); my $design_table_name = $probeset_table_name.'design'; my $funcion_table = $probeset_table_name.'affyfunc'; my $info_table = $probeset_table_name.'affyinfo'; my $note_table = $probeset_table_name.'affynote'; my $dbh = $self -> getDBhandle(); my $drop = $dbh -> prepare ( "DROP TABLE $probeset_table_name, $design_table_name, $info_table, $note_table, $funcion_table" ); $drop -> execute(); $dbh -> disconnect(); return 0; } } return 1; } sub get_headers { # GET headers: external method for take headers my ($self) = @_; return @headers; } } #end of header block # }}} # generate new tables {{{ sub target_table { my ($self) = @_; my $chip = $self -> getChipName(); my $dbh = $self -> getDBhandle(); my $probes_table = $chip.'probes'; my $target_table = $chip.'targets'; # check if table targets exist my $table_info = $dbh -> table_info; $table_info -> execute(); while (my @row = $table_info -> fetchrow_array ) { if ($row[2] eq $target_table) { $table_info -> finish(); print ("Table $target_table already exists\n"); return 0; } } # table doesn't exist yet my $create = $dbh -> prepare ( "CREATE TABLE $target_table ( set_id char(100) PRIMARY KEY, target_number int(4) )" ); $create -> execute(); # # TARGETS TABLE HANDLER # # This query generate data entry in this format: PROBESET - NUMBER_OF_TARGETS my $target_select = $dbh -> prepare ("SELECT set_id, count(DISTINCT licr_id) AS target_number from $probes_table GROUP BY set_id ORDER BY target_number DESC;"); my $target_insert = $dbh -> prepare ("INSERT $target_table (set_id,target_number) VALUES (?,?)"); my $target_check = $dbh -> prepare ("SELECT set_id FROM $target_table WHERE set_id=?"); $target_select -> execute(); # Take result and insert in the new table my $duplicates = 0; my $count = 0; while (my @result = $target_select -> fetchrow_array) { $target_check -> execute ($result[0]); if (my $row = $target_check -> fetchrow_arrayref) { $duplicates++; } else { $count++; $target_insert -> execute ($result[0],$result[1]); } } # create INDEX on name my $targets_index = $dbh -> prepare ( "CREATE INDEX targets ON $target_table (set_id)" ); $targets_index -> execute(); print "Index table Targets complete!\n"; # # SUMMARY # # print a little summary # for nmbr of records i subtract the first line (headers) print "\n-----TARGETS-SUMMARY----------\n\n$count probeset parsed\n$duplicates probeset duplication avoided\n"; print "\n---------------------------\n"; return 1; } # }}} 1; __END__ # perldoc {{{ =head1 NAME AffyDB - An interface for insert and query affymetrix, licr annotation tables and UCSC exons maps into a Entity Relationship mySQL database. =head1 SYNOPSIS # INSERT TABLES # AffyDB scripts that create a new set of Tables from affymetrix file annot_csv, # create licr information tables starting for Licr file, make index for speed up query, # generate a Report and disconnect from mySQL ... use AffyDB; my $affytable = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', affyfile => 'Mouse430_2_annot.csv', licrfile => 'Mouse430_2.RefSeq', chip => 'Mouse430_2', index => "1", summary => "1" ); $affytable -> affy_disconnect(); # ADD EXON MAP add a UCSC map of the HumanRefSeq. RefSeq UCSC map is a TAB-SEPARETED FILE use AffyDB; my $affytable = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', refseqfile => 'RefSeqExons.txt', refseqcode => 'HumanRefSeq1', chip => 'test' ); $affytable -> affy_disconnect(); # RETRIVE INFORMATION # GET'S methods used to retrive information from mySQL tables # Retrive probeset informatoion: use AffyDB; my $affytable = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', ) # get single probeset INFO my $probe = '1415670_at'; my @probe = $affydb -> get_probeset($probe); my $chip = 'Mouse Genome 430 2.0 Array'; print "Probe is @probe\n"; # get all probesets of a chip my @probes = $affydb -> get_all_probeset($chip); print join ("\n" , @probes); # GENERATE NEW TABLES # generate a table with number of targets for probeset use AffyDB; my $affytable = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', chip => 'HG_U133A' ) $affytable -> target_table(); # DISCONNECT $affytable -> affy_disconnect(); =head1 ABSTRACT This perl library uses perl5 objects to make it easy to create and query a mySQL improved version of Affymetrix annotation tables. This package defines AffyDB objects, attributes and arguments. Using a AffyDB object's methods, you can insert tables from a file, generate new tables and retrive data. Using this module with Probeset.pm analysis module is possible to generate original data about single probes position on the genome. AffyDB.pm provides a simple object-oriented interface to mySQL tables. The current version of AffyDB.pm is available at http://bio.ifom-firc.it/AffyDB/src/ ftp://bio.ifom-firc.it/AffyDB/src/ =head1 DESCRIPTION =head2 CONSTRUCTOR AND INITIALIZATION =over =item B use AffyDB; print $VERSION; Display current Module Version =item B my $affytable = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', affyfile => 'Mouse430_2_annot.csv', chip => 'Mouse430_2', summary => "1", index => "1" ); If you pass to the module an affyfile (format is csv: comma separeted values), this file will be inserted into the database. With summary you can ask for an extensive summary of the insertion and of the duplication avoided. With index we say to AffyDB.pm to generate index on tables (speed up queries). =item B my $licrtable = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', licrfile => 'HG-U133A.RefSeq', chip => 'HG-U133A', summary => "1", index => "1" ); As for affy annotation tables, if you give a licr file as argument (format is ssv: semicolon separeted values), the file will be inserted into the database. =item B my $affydb = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', chip => 'HG_U133A' ); Creation of a new connection without insertion of a new file (for retrive queries relatives to chipcode: HG_U133A). Note: mySQL don't like minus (-) in the table names. For this getChipName method switch (-) to (_). =item B my $affydb = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', chip => 'test', index => '1' ); In this mode, mySQL index for speed up are created: =over =item "CREATE INDEX design_index ON design_table (public_id)" =item "CREATE INDEX probes ON probes_table (set_id)" =item "CREATE INDEX probes2licr ON probes_table (licr_id)" =item "CREATE INDEX affyinfo ON affyinfo_table (set_id)" =item "CREATE INDEX affynote ON affynote_table (public_id)" =item "CREATE INDEX affyrefseq ON affynote_table (refseq_tran)" =item "CREATE INDEX affyfunc ON affyfunc_table (public_id)" =item "CREATE INDEX licr ON licrinfo_table (licr_id)" =item "CREATE INDEX licr2unigene ON licrinfo_table (unigene)" =item "CREATE INDEX probeset ON main_table (set_id)" =item "CREATE INDEX set2public_id ON main_table (public_id)" =back =item B my $affydb = AffyDB -> new ( mysql => 'affy:localhost', user => 'foo', password => 'bar', chip => 'test' ); $affydb -> target_table(); $affydb -> affy_disconnect(); =back =head2 ATTRIBUTES-ARGUMENTS AND OTHER INFOS =over =item B Mysql is the name and host of the mySQL database in format: 'database:host' =item B and B mySQL username and password to connect to database =item B (database handler) Current databade handler. Is set with the getDBhandle method. =item B F is the affy annotation table as csv file (comma separeted values). If give this attribute to the module parse the file and try to insert information into the mySQL database. The subroutine check headers check table headers integrity and exit if find some differences. =item B F is the licr annotation file as ssv file (semicolon separeted values). If give this attribute to the module parse the file and try to insert information into the mySQL database. =item B F is the UCSC exon map as TAB-separeted file. If give this attribute to the module parse the file and try to insert information into the mySQL database. =item B NOTE: into the mySQL tables genechip name are LONG (Mouse Genome 430 2.0 Array) while THIS chipcode affect the name of the probe and design tables for that specific genechip. CONVENTION: as chipcode you should use the name of the file without suffix: "_annot.csv". EXAMPLE: table => 'MOE430A_annot.csv', chip => 'MOE430A' Cause mySQL doesn't like '-' into the table name, The module will substitute '-' with '_' (underscore). So chip names like HG-U133A are changed to HG_U133A. =item B Use refseqcode to call Exon map table (as chipcode). =item B (1 or 0) If you want an extensive summary of database creation and information about duplicates into the csv table put 1 here. =item B Set this argument to 1 if you want to make some index to accellerate SQL queries (BETTER) =back =head2 CLASS AND OBJECT METHODS =over =item B Return the current dbh (DBI database handler) =item B Set the current DBI handler =item B Return file name of the affymetric annotation table =item B Return file name of the Licr annotation table =item B Return file name of the UCSC exon map =item B Return the RefSeq code for table creation and queries. =item B Get the chipcode, if chipcode contain '-' (minus) substitute with '_' (underscore) =item B Check if an extensive summary are asked from the user =item B Disconnect from the database ($dbh -> disconnect of the DBi module) use always this method to disconnect from AffyDB mySQL DB =item B Check if creations of index for this chip is required, used to call method create_index =head2 GET METHODS =over =item B Prepare and execute a query from the script caller (SQL syntax) Es: my $query = ; chomp $query; my @result = $affydb -> freequery($query); print join ("\n", @result); =item B Prepare and execute a query to retrive probeset_id ($array[0]), genechip ($array[1]) and relative affymetrix public_id ($array[2]) and probeset description ($array[3]), cluster ($array[4]), assignments ($array[5]), notes ($array[6]) starting from a probeset id (Es: 1007_s_at) Es: my $probe = '1415670_at'; my @probe = $affydb -> get_probeset($probe); print "Probe is @probe\n"; =item B Prepare and execute a query to retrive ONLY the probesets IDs that match for a specific ACC (public_id of the affymetrix annotation tables) Es: my $public_id = 'NM_013477'; my @probes = $affydb -> get_matching_probeset ($public_id); print join ("\n", @probes); =item B Prepare and execute a query to retrive all probesets IDs (ES: 1007_s_at) starting from a chip name. Es: my $chip = 'Mouse Genome 430 2.0 Array'; my @probes = $affydb -> get_all_probeset($chip); print join ("\n" , @probes); =item B Prepare and execute a query to retrive design affymetrix information of a specific probeset. $info[0] = public_id $info[1] = seq_type $info[2] = seq_source $info[3] = target_des $info[4] = arch_unigene $info[5] = trans_id $info[6] = description $info[7] = cluster $info[8] = assignments $info[9] = notes Es: my @info = $affydb -> get_probeset_design ($probe); print join ("\n",@info); =item B Prepare and execute a query to retrive chip information starting from the chipcode. $info[0] = chipcode $info[1] = genechip_name $info[2] = organism $info[3] = annotation_date Es: my @info = $affydb -> get_chip ($chip); print "GENECHIP info are:\n"; print join ("\n",@info); =item B Prepare and execute a query to retrive ALL DISTINCT probes of a specific probeset. (Relation One to Many) Format String: PROBE_ID: SET_ID | X:N | Y:M | ACGTGCGTGTGTGTACGCGCGAA You will retrive an array that contains a list of rows with this format. =item B Prepare and execute a query to retrive ALL DISTINCT probes of a specific probeset. (Relation One to Many) Format HTML: (X,Y) ACGCGCGTGCAGCAGCGCAGCATGACGA" =item B Prepare and execute a query to retrive ALL probes of a specific probeset. (WITH REDUNDANCY) Format String: PROBE_ID: SET_ID | X:N | Y:M | ACGTGCGTGTGTGTACGCGCGAA You will retrive an array that contains a list of rows with this format. =item B Prepare and execute a query to retrieve ALL matching locations for a specific Probeset (One to many). Location are pushed ito an array of array: '_locations' => [ [ '1007_s_at', '1', 'NM_001954', '[3678..3702]3840', '(+)' ], ] =item B Prepare and execute a query to retrive licr information about a list of single probes. $info[0] = probe_id $info[1] = licr_id $info[2] = x $info[3] = y $info[4] = oligo $info[5] = set_id $info[6] = position $info[7] = strand =item B Prepare and execute a query to retrive ALL distinct RefSeq that match for this probeset (licr tables). Information are pushed in a array of array as locations. =item B Prepare and execute a query to retrive ALL distinct RefSeq that match for this probeset (licr tables). Information are stored in a HTML table. CSS table class: licr td class: head and value =item B< get_affy_alignments> Prepare and execute a query to retrive Alignments information (affymetrix tables). FORMAT STRING: GENOME VERSION: @$row[1] | ALIGNMENTS: @$row[2] =item B< get_affy_alignments_html> Prepare and execute a query to retrive Alignments information (affymetrix tables). FORMAT STRING: "GENOME VERSION@$row[1]ALIGNMENTS@$row[2]" =item B Prepare and execute a query to retrive Affymetrix annotation. FORMAT STRING: PUBLIC ID: @$row[0] | GENE_SYMBOL: @$row[1] | GENE_TITLE: @$row[2] | CHR_LOCATION: @$row[3] | UNIGENE: @$row[4] | ENSEMBL: @$row[5] | LOCUSLINK: @$row[6] | SWISSPROT: @$row[7] | EC: @$row[8] | OMIM: @$row[9] | REFSEQ_PROT: @$row[10] | REFSEQ_TRAN: @$row[11] | FLYBASE: @$row[12] | AGI: @$row[13] | WORMBASE: @$row[14] | MGI: @$row[15] | RGD: @$row[16] | SGD: @$row[17] =item B Prepare and execute a query to retrive Affymetrix annotation. FORMAT STRING: PUBLIC ID@$row[0] GENE_SYMBOL@$row[1] GENE_TITLE@$row[2] CHR_LOCATION@$row[3] UNIGENE@$row[4] UNIGENE TYPE@$row[5] ENSEMBL@$row[6] LOCUSLINK@$row[7] SWISSPROT@$row[8] EC@$row[9] OMIM@$row[10] REFSEQ_PROT@$row[11] REFSEQ_TRAN@$row[12] FLYBASE@$row[13] AGI@$row[14] WORMBASE@$row[15] MGI@$row[16] RGD@$row[17] SGD@$row[18] =item B Prepare and execute a query to retrive Affymetrix functional annotation. FORMAT STRING: PUBLIC ID: @$row[0] | GO BIO: @$row[1] | GO CELL: @$row[2] | GO MOL: @$row[3] | PATHWAY: @$row[4] | PROT FAM: @$row[5] | PROT DOM: @$row[6] | INTERPRO: @$row[7] | MEMBRANE: @$row[8] | QTL: @$row[9] =item B Prepare and execute a query to retrive Affymetrix functional annotation. FORMAT STRING: PUBLIC ID@$row[0] GO BIO@$row[1] GO CELL@$row[2] GO MOL@$row[3] PATHWAY@$row[4] PROT FAM@$row[5] PROT DOM@$row[6] INTERPRO@$row[7] MEMBRANE@$row[8] QTL@$row[9]"; =item B Prepare and execute a query to retrive specific map for a RefSeq code (ACC). As locations, refseq_map is an array of array. =item B Take the headers (first line) of the affymetrix annotation table. Es: my @headers = $affydb -> get_headers(); print join ("\n", @headers); =back =head1 BUGS Please report them! =head1 FILES AffyDB.pm =head1 SEE ALSO Probeset.pm =head1 AUTHOR Davide Rambaldi, IFOM-FIRC www.ifom-firc.it e-mail: filter-drambald@ifom-ieo-campus.it =head1 COPYRIGHT This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. =cut # }}}