DECIPHER - Gallery Example 1

Gallery - Example 1
Below is the R code to reproduce the example:

Hide output
library(DECIPHER)
> library(RSQLite)
> 
> # load sequences into a database
> fas <- system.file("extdata",
+    "50S_ribosomal_protein_L2.fas",
+    package="DECIPHER")
> dbConn <- dbConnect(SQLite(),
+    ":memory:")
> Seqs2DB(fas,
+    type="FASTA",
+    dbFile=dbConn,
+    identifier="")
Reading FASTA file from line  1 to 1e+05


317 total sequences in table Seqs.
Time difference of 0.15 secs


> 
> # identify the sequences based on genus
> x <- dbGetQuery(dbConn,
+    "select description from Seqs")$description
> ns <- unlist(lapply(strsplit(x,
+       split=" "),
+    FUN=`[`,
+    1L))
> Add2DB(myData=data.frame(identifier=ns),
+    dbFile=dbConn)
Expression:
update or replace Seqs set identifier = :identifier where row_names =
:row_names


Added to table Seqs:  "identifier".
Time difference of 0.01 secs


> 
> # align the translated sequences
> dna <- SearchDB(dbConn,
+    nameBy="identifier")
Search Expression:
select identifier, _Seqs.sequence from Seqs join _Seqs on
Seqs.row_names = _Seqs.row_names where _Seqs.row_names
in (select row_names from Seqs)


DNAStringSet of length: 317
Time difference of 0.03 secs


> AA <- AlignTranslation(dna,
+    type="AAStringSet")
Determining distance matrix based on shared 4-mers:
  |============================================| 100%


Time difference of 2.02 secs


Clustering into groups by similarity:
  |============================================| 100%


Time difference of 0.49 secs


Aligning Sequences:
  |============================================| 100%


Time difference of 5.2 secs


Determining distance matrix based on alignment:
  |============================================| 100%


Time difference of 0.36 secs


Reclustering into groups by similarity:
  |============================================| 100%


Time difference of 0.44 secs


Realigning Sequences:
  |============================================| 100%


Time difference of 5.75 secs


Refining the alignment:
  |============================================| 100%


Time difference of 0.01 secs


> Seqs2DB(AA,
+    type="AAStringSet",
+    dbFile=dbConn,
+    identifier="",
+    tblName="Aligned")
Adding 317 sequences to the database.


317 total sequences in table Aligned.
Time difference of 0.28 secs


> Add2DB(myData=data.frame(identifier=ns),
+    dbFile=dbConn,
+    tblName="Aligned")
Expression:
update or replace Aligned set identifier = :identifier where
row_names = :row_names


Added to table Aligned:  "identifier".
Time difference of 0.01 secs


> 
> # form a consensus sequence for each genus
> cons <- IdConsensus(dbConn,
+    tblName="Aligned",
+    type="AAStringSet",
+    threshold=0.5,
+    minInformation=0.5)
  |============================================| 100%


Found consensus for 70 groups.
Time difference of 1.08 secs


> Seqs2DB(cons,
+    type="DNAStringSet",
+    dbFile=dbConn,
+    identifier="",
+    tblName="Consensus")
Adding 70 sequences to the database.


70 total sequences in table Consensus.
Time difference of 0.05 secs


> aa <- SearchDB(dbConn,
+    "Consensus",
+    type="AAStringSet",
+    nameBy="description",
+    limit="40,20",
+    removeGaps="common")
Search Expression:
select description, _Consensus.sequence from
Consensus join _Consensus on Consensus.row_names =
_Consensus.row_names where _Consensus.row_names in
(select row_names from Consensus) limit 40,20


AAStringSet of length: 20
Time difference of 0.01 secs


> 
> # view the sequences in a browser
> BrowseSeqs(aa,
+    threshold=0.5,
+    minInformation=0.5,
+    colWidth=60)
> dbDisconnect(dbConn)
[1] TRUE