The EnsEMBL project not only provides data on genome annotation, it also maintains a powerful interface to work with this data programatically (API). The large set of modules written in Perl can for example be used to
fetch information on your favorite genomic region
get data on specific genes in different organisms
annotate the clones on your microarray
start your own gene-prediction project
There is an API in the language JAVA as well, this is not being developed further, though. An API in Ruby is being developed externally and aviabable here. Of special interest is also the BioMart system which allows complex cross-database queries in a simple interface. More information can be found on the EnsEMBL pages. For specific coding questions subscribe to the mailing list.
#connect to defined database using the DBAdaptor # (used in examples 1-8)
use Bio::EnsEMBL::DBSQL::DBAdaptor; my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor( -host => 'ensembldb.ensembl.org', -dbname => 'homo_sapiens_core_42_36b', -user => 'anonymous', );
#OR connect automaticall using a the Registry # (used in example 9 ff.) #please see
http://www.ensembl.org/info/software/registry/index.html
use Bio::EnsEMBL::Registry;
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => 'ensembldb.ensembl.org',
-user => 'anonymous'
);
my $chrom = "X"; my $slice_adaptor = $db->get_SliceAdaptor; my $slice = $slice_adaptor->fetch_by_region("chromosome", $chrom); print "\nhave slice of ".$slice->seq_region_name." ". $slice->seq_region_start."-".$slice->seq_region_end;
my @chromosomes; foreach my $chr ( @{ $slice_adaptor->fetch_all('chromosome') } ) { #print out information print $chr->seq_region_name.", ".$chr->start." - ".$chr->end."\n"; #store the names push @chromosomes, $chr->seq_region_name; #or work with the chromosome... }
#all genes from a slice, e.g. a chromosome # [get_slice] my @genes = @{ $slice->get_all_Genes() };
#specific gene, using EnsEMBL-ID my $gene_adaptor = $db->get_GeneAdaptor; my $gene = $gene_adaptor->fetch_by_stable_id("ENSG00000147892");
#specific gene, using gene symbol (short name) my $gene_adaptor = $db->get_GeneAdaptor; my @genes = @{$gene_adaptor->fetch_all_by_external_name("ADAMTSL1")};
use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
#use Registry file for a simple connection setup, #please see
http://www.ensembl.org/info/software/registry/index.html
use Bio::EnsEMBL::Registry;
Bio::EnsEMBL::Registry->load_registry_from_db(
-host => 'ensembldb.ensembl.org',
-user => 'anonymous'
);
#get compara adaptors
my $ma = Bio::EnsEMBL::Registry->get_adaptor(
'compara', 'compara', 'Member')
or die "\n$@\ncan't get adaptor 1.\n";
my $ha = Bio::EnsEMBL::Registry->get_adaptor(
'compara', 'compara', 'Homology')
or die "\n$@\ncan't get adaptor 2.\n";
#fetch human gene from core database
my $query_species = "Homo_sapiens";
my $gene_id = "ENSG00000147892";
#fetch source gene
my $member = $ma->fetch_by_source_stable_id(
"ENSEMBLGENE", $gene_id) or return 0;
my $sourceGenome = $member->genome_db->dbID;
print "\nsource gene ($query_species): ".$member->stable_id;
#get all homologues from other species
my $other_species = "Mus_musculus";
my $homologies = $ha->fetch_by_Member_paired_species(
$member, $other_species);
#or from all species
#my $homologies = $ha->fetch_by_Member($member);
#display all results
foreach my $homologie (@$homologies) {
foreach my $member_attrib (@{$homologie->get_all_Member_Attribute}) {
my ($newmember, $attrib) = @$member_attrib;
if ($newmember->genome_db->dbID != $sourceGenome) {
print "\nhomologue: ".$newmember->stable_id.
" / ".$newmember->taxon_id.
": ".$newmember->chr_name.
" ".$newmember->chr_start.
"-".$newmember->chr_end;
}
}
}
10. get GeneOntology term for a gene using EnsEMBL & GOApph
use Bio::EnsEMBL::DBSQL::DBAdaptor; # [connect (1)]
#use GO::AppHandle for GO logic if possible! use GO::AppHandle; my %args = ( -dbhost => 'sin.lbl.gov', -dbname => 'go', ); my $apph = GO::AppHandle->connect( \%args );
# [get_gene (5)]
#get GO infos if ( $gene->is_known ) { foreach $link ( @{ $gene->get_all_DBLinks } ) { if ( $link->database eq "GO" ) {
#show GO term print $link->display_id;
#get the ancester terms foreach my $go (@GOs1) { get_parent($go); }
} } }
#fetch all parent terms recursively sub get_parent($) { my $term = shift; my $parent_term; my $type; my $parent_terms;
$parent_terms = $apph->get_parent_terms($term); foreach $parent_term (@$parent_terms) { get_parent($parent_term); } $parent_term = $term->name(); if ( ( $parent_term ne "Gene_Ontology" ) && ( $parent_term ne "molecular_function" ) && ( $parent_term ne "cellular_component" ) && ( $parent_term ne "biological_process" ) ) { print $parent_term."(".$term->type "), "; } }
11. get GeneOntology term for a gene using EnsEMBL only