#!/usr/bin/perl -w use strict; use DBI; my $in = $ARGV[0]; my $out = $ARGV[1]; my $go_db_name = $ARGV[2]; my $go_db_username = $ARGV[3]; my $go_db_password = $ARGV[4]; # Major categories of GO terms # ---------------------------- # Processes my @processes = ("metabolic process", "electron transport", "photosynthesis", "cell cycle", "cellular component organization and biogenesis", "cell communication", "cellular response to stimulus", "localization"); # Compnents my @components = ("nucleus", "mitochondrion", "plastid", "endoplasmic reticulum", "cytosol", "ribosome", "chromosome", "cytoskeleton", "membrane", "Golgi apparatus", "lysosome"); # Functions my @functions = ("catalytic activity", "enzyme regulator activity", "signal transducer activity", "antioxidant activity", "structural molecule activity", "transporter activity", "transcription regulator activity", "translation regulator activity", "DNA binding", "RNA binding", "nucleotide binding"); my $unknown = "_UNKNOWN_"; my $others = "_OTHERS_"; open IN, "< $in" or die "Error opening input file $in: $!"; open OUT, "> $out" or die "Error opening output file $out: $!"; # Flush immediately to the output file (i.e. no buffering) my $old_fh = select(OUT); $| = 1; select($old_fh); # Connect to the database that holds the schema and data of GO my $dbh = DBI->connect("dbi:mysql:$go_db_name",$go_db_username, $go_db_password) or die "Error connecting to GO database:", $DBI::errstr; my $line; my $index = 0; while ($line = ) { chomp($line); if ($line =~ m/^([^\s]+)\s+([^\s]+)\s+([^\s]+)$/) { $index++; print "$index\t$1\t$2\t$3\n"; my $id = $1; my $db = $2; my $key = $3; print OUT $id; my $name = get_go($db, $key, 'biological_process', @processes); print OUT "\t", $name; $name = get_go($db, $key, 'cellular_component', @components); print OUT "\t", $name; $name = get_go($db, $key, 'molecular_function', @functions); print OUT "\t", $name; print OUT "\n"; } } $dbh->disconnect(); close(OUT); sub get_go { my ($db, $key, $type, @parents) = @_; my $sql = qq { select term.id, term.name from dbxref inner join gene_product on (dbxref.id = gene_product.dbxref_id) inner join association on (association.gene_product_id=gene_product.id) inner join term on (association.term_id = term.id) where dbxref.xref_dbname = '$db' and dbxref.xref_key = '$key' and term.term_type = '$type' and term.name <> '$type' }; my $sth = $dbh->prepare($sql) or die "Error prepare: ", $dbh->errstr; $sth->execute or die "Error execute: ", $dbh->errstr; my $id = -1; my $name = $unknown; $sth->bind_columns(\$id, \$name); if ($sth->fetch()) { $name = get_parent($id, @parents); } $sth->finish(); $name =~ tr/ /_/; return $name; } sub get_parent { my ($child, @list) = @_; my $parent = $others; my $found = 0; for (my $i = 0; $i < @list && !$found; $i++) { my $item = $list[$i]; if (belongs($child, $item)) { $parent = $item; $found = 1; } } return $parent; } sub belongs { my ($child, $parent) = @_; my $sql = qq { select p.id from graph_path inner join term as t on (t.id = graph_path.term2_id) inner join term as p on (p.id = graph_path.term1_id) where t.id = $child and p.id in (select term.id from term where name = '$parent') }; my $sth = $dbh->prepare($sql) or die "Error prepare: ", $dbh->errstr; $sth->execute or die "Error execute: ", $dbh->errstr; my $val = 0; if ($sth->fetch()) { return $val = 1; } $sth->finish(); return $val; }