GetLepidoptera.pl

From Organic Design wiki

<perl>

  1. !/usr/bin/perl -w

use Data::Dumper; use strict; use Bio::DB::GenBank; my $gb = new Bio::DB::GenBank; my $debug = 1; my $sep = "\t";

  1. Initialize query

my $query = Bio::DB::Query::GenBank->new

              (-query   =>'desaturase AND Lepidoptera',
               -db      => 'nucleotide');

if(($query->count) >= 150) {

   print "==== Total number of records:", $query->count, " ====\n";    
   exit(0);

}else{

   if($debug) {

print "==== Total number of records:", $query->count, " ====\n";

   }
   print "Accession\tProtein_id\tProduct\tDescription\n";
  1. stream the queries in
   my $stream = $gb->get_Stream_by_query($query);
   if($debug) {

print "Data streamed in\n";

   }
  1. grab each query
   while(my $seq =  $stream->next_seq) {

my $isprotein = 0;

  1. sleep(rand(2));

foreach my $feature ($seq->all_SeqFeatures){ if($feature->primary_tag eq 'CDS') { if($feature->has_tag('protein_id')) { print $seq->accession_number, $sep, $feature->each_tag_value('protein_id'), $sep, $feature->each_tag_value('product'), $sep, $seq->description, "\n"; $isprotein = 1; } } } if(!$isprotein) { print $seq->accession_number, qq($sep-$sep-$sep), $seq->description, "\n"; }

   }

} </perl>