9 Circular Plots
Jupyter interactive version:visit gitlab for installation instructions https://gitlab.com/ferroao/idiogramFISH
9.1 Example with monocen. and holocen.
{
require(idiogramFISH)
require(plyr)
dfOfChrSize$OTU <- "Species mono"
dfChrSizeHolo$OTU <- "Species holo"
monoholoCS <- plyr::rbind.fill(dfOfChrSize,dfChrSizeHolo)
dfOfMarks2$OTU <-"Species mono"
dfMarkPosHolo$OTU <-"Species holo"
monoholoMarks <- plyr::rbind.fill(dfOfMarks2,dfMarkPosHolo)
monoholoMarks[which(monoholoMarks$markName=="5S"),]$markSize<-.5
monoholoMarks[10,]$markName <- "prot"
monoholoMarks[10,]$markSize <- 1
dfMarkColor<-rbind(dfMarkColor,c("prot","black","exProtein"))
}
plotIdiograms(dfChrSize = monoholoCS, # data.frame of chr. size
dfMarkColor= dfMarkColor, # df of mark style
dfMarkPos = monoholoMarks,# df of mark positions, includes cen. marks
squareness =5, # vertices squareness
addOTUName = TRUE, # add OTU names
distTextChr = .5, # separ. among chr. and text and among chr. name and indices
chrId="original", # use original name of chr.
OTUTextSize = .7, # size of OTU name
legendHeight= 1, # height of legend labels
legendWidth = 1, # width of legend labels
# ,legend="inline"
fixCenBorder = TRUE, # use chrColor as border color of cen. or cen. marks
xlimLeftMod=1, # modify xlim left argument of plot
xlimRightMod=2, # modify xlim right argument of plot
ylimBotMod= .2 # modify ylim bottom argument of plot
,useOneDot = FALSE
# GRAPHICAL PARAMETERS FOR CIRCULAR PLOT
,circularPlot = TRUE # circularPlot
,shrinkFactor = .9 # percentage 1 = 100% of circle with chr.
,circleCenter = 3 # X coordinate of circleCenter (affects legend pos.)
,chrLabelSpacing = .9 # chr. names spacing
,OTUsrt = 0 # angle for OTU name (or number)
,OTUplacing = "number" # Use number and legend instead of name. See OTUcentered
,OTUjustif = 0 # OTU names justif. left.
,OTULabelSpacerx = -0.5 # modify position of OTU label, when OTUplacing="number" or "simple"
,OTUlegendHeight = 1.5 # space among OTU names when in legend - OTUplacing
,separFactor = 0.75
)

9.2 Recreating circular karyotype of (Golczyk et al., 2005)
# First swap short and long arms to show the same rotation of the article
listradfs<-swapChrRegionDfSizeAndMarks(traspadf,traspaMarks,c("3","6","7","9","12") )
# Create marks' characteristics
dfMarkColor5S25S<-read.table(text=" markName markColor style
5S black dots
25S white dots" , header=TRUE, stringsAsFactors=FALSE,fill=TRUE)
plotIdiograms(dfChrSize = listradfs$dfChrSize, # d.f. of chr. sizes
dfMarkPos = listradfs$dfMarkPos, # d.f. of marks' positions
dfMarkColor = dfMarkColor5S25S, # d.f. of mark characteristics
cenColor = "black", # cen. color
squareness = 5, # corner squareness
chrWidth = 1, # chr. width
orderChr = "name" # order chr. by name
,addOTUName = FALSE # do not add OTU name
,legendHeight = 3 # labels separ. y axis
# circular plot parameters
,circularPlot=TRUE
,radius=5 # basic radius
,useOneDot=FALSE # use two dots in dot marks
,chrLabelSpacing = 1 # chr. name spacing
,rotation = 0.1 # anti-clockwise start site in x*pi radians, from top (0)
,shrinkFactor = .95 # % of circle use
)

9.3 Plasmid data from genBank
Using upArrow
and downArrow
styles, clockwise and anti-clockwise, respectively.
# data from: https://www.ncbi.nlm.nih.gov/nuccore/NZ_CP009939.1
#install.packages("rentrez")
library(rentrez)
# search string
bcereus <- "Bacillus cereus strain 03BB87 plasmid pBCN, complete sequence"
bcereus_search <- rentrez::entrez_search(db="nuccore", term = bcereus)
# get summaries
esummaries<-rentrez::entrez_summary(db = "nuccore", id = bcereus_search$ids)
# download plasmid data
# From the entrez formats:
# https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/
# idiogramFISH can read only:
rentrezDownloadPlas <- rentrez::entrez_fetch(db="nuccore",
id = bcereus_search$ids[1],
rettype="gbwithparts",
retmode = "text")
mylist<-genBankReadIF(rentrezDownloadPlas)
# data.frames in mylist
names(mylist)
# [1] "gbdfMain" "gbdfAssemblyMeta"
# [3] "source" "gene"
# [5] "CDS"
# mylist$source
# View(mylist$gbdfMain)
# View(mylist$gbdfAssemblyMeta)
# mylist$gbdfAnnoMeta
# View(mylist$CDS)
# View(mylist$gene)
# Authors of plasmid sequence
paste(mylist$gbdfMain[which(mylist$gbdfMain$field=="AUTHORS"),][1,2] )
# [1] "Johnson,S.L., Minogue,T.D., Teshima,H., Davenport,K.W., Shea,A.A.,; Miner,H.L., Wolcott,M.J. and Chain,P.S."
# create plasmid size data data.frame
{
myPlasmiddf <- data.frame(chrName=1, chrSize=mylist$source$end)
myPlasmiddf$OTU<-mylist$gbdfMain[which(mylist$gbdfMain$field=="DEFINITION"),]$value
myPlasmiddf$OTU<-gsub(", complete sequence.","",myPlasmiddf$OTU)
# Creating mark info data.frame
mylistSel<- mylist[which(names(mylist) %in% "gene")]
mylistSelDF <- dplyr::bind_rows(mylistSel, .id="feature")
mylistSelDF$markPos <-pmin(as.numeric(mylistSelDF$begin),as.numeric(mylistSelDF$end) )
mylistSelDF$markSize<-abs(as.numeric(mylistSelDF$end)-as.numeric(mylistSelDF$begin) )
mylistSelDF$markName<-mylistSelDF$locus_tag
# orientation of arrows
mylistSelDF$style<-ifelse(mylistSelDF$isComplement,"downArrow","upArrow")
# Replace codes with names
mylistSelDF[which(!is.na(mylistSelDF$gene) ),]$markName<-
mylistSelDF[which(!is.na(mylistSelDF$gene) ),]$gene
# subset columns
marksDfPlas<-mylistSelDF[,c("markName","markPos","markSize","style"),]
# add OTU name
marksDfPlas$OTU<-myPlasmiddf$OTU
# add mandatory column
marksDfPlas$chrName<-myPlasmiddf$chrName
# organize inner arrows (downArrow) in two columns avoiding overlap
protVal <- .5 # this values (and others) must be the same
circVal <- TRUE # in plotIdiograms function
rotaVal <- 0
marksDfPlasCols<-namesToColumns(marksDfPlas, myPlasmiddf,
markType=c("downArrow"),
amountofSpaces=10,colNumber=2,
protrudingInt=1.3, protruding = protVal,
circularPlot = circVal,
rotation=rotaVal
)
# add marker for start pos.
colnames(marksDfPlasCols)
marksDfPlasCols<-rbind(marksDfPlasCols,c(paste0("START",paste0(rep(" ",0), collapse="")),1,NA,"square",myPlasmiddf$OTU,1,NA) )
# create mark general data data.frame
markStyle <- makedfMarkColorMycolors(
unique(marksDfPlasCols$markName), c("black","forestgreen","cornflowerblue") )
# arrows
markStyle$style <- marksDfPlasCols$style[match(markStyle$markName, marksDfPlasCols$markName)]
markStyle$protruding <- marksDfPlasCols$protruding[match(markStyle$markName, marksDfPlasCols$markName)]
# prefix to remove from marks
mypattern<-sub("([[:alnum:]]+_).*","\\1",trimws(marksDfPlas$markName[1]) )
}
library(idiogramFISH)
par(mar=rep(0,4), oma = rep(0,4) )
plotIdiograms(dfChrSize = myPlasmiddf, # plasmid size d.f.
dfMarkPos = marksDfPlasCols, # mark pos d.f.
dfMarkColor = markStyle, # mark style d.f.
chromatids = FALSE,
squareness = 21, # corners not rounded
chrWidth = 0.5, # chr. width
chrId="", # no chr. name
markLabelSize=.7, # font size of labels
pattern=mypattern, # remove pattern from mark names
cMBeginCenter = TRUE,
legend ="inline",
protruding= protVal,
ylimBotMod = 0, # modify plot size
ylimTopMod = 0,
xlimLeftMod = 2,
# circular params.
circularPlot = circVal, # circular
rotation=rotaVal, # begin plasmid in top
radius=2.5,
shrinkFactor = 1, # use 100% of circle
labelSpacing = 1.7, # label spacing from chr.
labelOutwards = TRUE, # label projected based on mark angle
OTUjustif = 0.5, # OTU name justif. centered.
OTUplacing = "simple" # plasmid name place. See OTUcentered
,OTUTextSize = .8 # font size of OTU name
)

9.4 Prokaryote chromosome from genBank
# Option 1: Download prokaryote genome data from:
# https://www.ncbi.nlm.nih.gov/nuccore/NC_014248.1
# Choose Customize View -> Basic Features -> genes, CDS
# Send To -> File -> Create File
# Use your file name:
dataChr.gb <- "nostoc.gb" # 5 Mbytes
# Option 2: Download with rentrez package
library(rentrez)
# search string
nostoc <- "'Nostoc azollae' 0708, complete"
nostoc_search <- rentrez::entrez_search(db="nuccore", term=nostoc)
# get summaries
esummariesNostoc<-rentrez::entrez_summary(db="nuccore", id=nostoc_search$ids)
# select only perfect matches
select<-numeric()
for (i in 1:length(esummariesNostoc)){
print(esummariesNostoc[[i]]$title)
if(esummariesNostoc[[i]]$title %in% grep(nostoc,esummariesNostoc[[i]]$title, value=T) ){
select<-c(select,i)
}
}
select
# 3 8
# download chr. data
dataChr.gb <- rentrez::entrez_fetch(db="nuccore",
id=nostoc_search$ids[select][1],
rettype="gbwithparts",
retmode = "text")
# START:
library(idiogramFISH)
mylistChr<-genBankReadIF(dataChr.gb) # 9 seconds
names(mylistChr)
# "gbdfMain" "gbdfAnnoMeta" "source" "gene" "CDS" "tRNA"
# "regulatory" "ncRNA" "rRNA" "misc_feature" "tmRNA"
# Authors of sequence
paste(mylistChr$gbdfMain[which(mylistChr$gbdfMain$field=="AUTHORS"),][1,2] )
# [1] "Ran,L., Larsson,J., Vigil-Stenman,T., Nylander,J.A., Ininbergs,K.,;
# Zheng,W.W., Lapidus,A., Lowry,S., Haselkorn,R. and Bergman,B."
# create chr. size data data.frame
# columns chrName and chrSize
myProkaryotedf <- data.frame(chrName=1, chrSize=mylistChr$source$end)
# column with OTU name
myProkaryotedf$OTU<-mylistChr$gbdfMain[which(mylistChr$gbdfMain$field=="DEFINITION"),]$value
myProkaryotedf$OTU<-gsub(", complete genome.","",myProkaryotedf$OTU)
# Creating mark info data.frame excluding some features
mylistChrSel <- mylistChr[which(names(mylistChr) %in%
setdiff( names(mylistChr) , c("gbdfMain","gbdfAnnoMeta","source","CDS") ) )]
# or:
# mylistSel<- mylistChr[which(names(mylistChr) %in% "CDS")]
# transform list into data.frame
mylistChrDF<-dplyr::bind_rows(mylistChrSel, .id="feature")
# add necessary columns
mylistChrDF$markPos <-pmin(as.numeric(mylistChrDF$begin),as.numeric(mylistChrDF$end) )
mylistChrDF$markSize<-abs(as.numeric(mylistChrDF$end)-as.numeric(mylistChrDF$begin) )
mylistChrDF$markName<-mylistChrDF$locus_tag
# Replace codes with genes, and replace NAs in markNames (locus_tag)
mylistChrDF[which(!is.na(mylistChrDF$gene) ),]$markName<-
mylistChrDF[which(!is.na(mylistChrDF$gene) ),]$gene
mylistChrDF[which(!is.na(mylistChrDF$regulatory_class) ),]$markName<-
mylistChrDF[which(!is.na(mylistChrDF$regulatory_class) ),]$regulatory_class
# make unique names, otherwise some marks may share style and color
mylistChrDF$markName<-make.uniqueIF(mylistChrDF$markName)
# when no markName and note available:
mylistChrDF[which(is.na(mylistChrDF$markName) ),]$markName<-
sub("([[:alpha:] ]+);.*","\\1", mylistChrDF[which(is.na(mylistChrDF$markName) ),]$note )
# orientation of arrows
mylistChrDF$style<-ifelse(mylistChrDF$isComplement,"downArrow","upArrow")
# select main columns for data.frame of marks' positions
marksDfChr<-mylistChrDF[,c("markName","markPos","markSize","feature","isJoin","style"),]
marksDfChr$OTU<-myProkaryotedf$OTU
# add mandatory column
marksDfChr$chrName<-myProkaryotedf$chrName
# Organize mark names in columns to avoid overlap
rotaVal<-0
marksDfChrCols<-namesToColumns(marksDfChr, myProkaryotedf,
markType=c("downArrow","upArrow"),
amountofSpaces=13,colNumber=4,
protrudingInt=0.5,
rotation = rotaVal)
{
# add marker for start pos.
colnames(marksDfChrCols)
marksDfChrCols<-rbind(marksDfChrCols
,c(" START",1,NA
,"start",FALSE,"square",myProkaryotedf$OTU,1,NA)
)
# unique(marksDfChrCols$markName)
# create mark general data data.frame
markStyleNostoc <- makedfMarkColorMycolors(
unique(marksDfChrCols$markName), c("black","forestgreen","cornflowerblue") )
unique(marksDfChrCols$feature)
# [1] "gene" "tRNA" "regulatory" "ncRNA" "rRNA" "tmRNA" "start"
unique(marksDfChrCols$isJoin)
# [1] "FALSE"
# change some colors depending on feature
markStyleNostoc[which(markStyleNostoc$markName %in%
marksDfChrCols[which(marksDfChrCols$feature %in% c("tRNA","tmRNA") ),]$markName
),]$markColor<-"magenta"
markStyleNostoc[which(markStyleNostoc$markName %in%
marksDfChrCols[which(marksDfChrCols$feature %in% c("regulatory","ncRNA") ),]$markName
),]$markColor<-"tomato3"
markStyleNostoc[which(markStyleNostoc$markName %in%
marksDfChrCols[which(marksDfChrCols$feature %in% "rRNA" ),]$markName
),]$markColor<-"red2"
markStyleNostoc[which(markStyleNostoc$markName %in%
marksDfChrCols[which(marksDfChrCols$feature %in% c("misc_binding","misc_feature" ) ),]$markName
),]$markColor<-"lightsalmon"
# or:
# When isJoin is TRUE (CDS feature included)
# markStyleNostoc[which(markStyleNostoc$markName %in%
# marksDfChrCols[which(marksDfChrCols$isJoin==TRUE),]$markName
# ),]$markColor<-"red"
# arrows info. to d.f. of charac.
markStyleNostoc$style <- marksDfChrCols$style[match(markStyleNostoc$markName, marksDfChrCols$markName)]
markStyleNostoc$protruding <- marksDfChrCols$protruding[match(markStyleNostoc$markName, marksDfChrCols$markName)]
mypattern<-sub("([[:alnum:]]+_).*","\\1",trimws(marksDfChrCols$markName[1]) )
}
png("NOSTOC.png", width=2795, height=2795) # 2.7 Mb increase size to increase resolution
# pdf("NOSTOC.pdf", width=2795/80, height=2795/80)
# svg("NOSTOC.svg", width=2795/80, height=2795/80) # 42 Mb vectorized
par(mar=rep(0,4))
plotIdiograms(dfChrSize = myProkaryotedf, # chr. data d.f.
dfMarkPos = marksDfChrCols, # mark pos d.f.
dfMarkColor = markStyleNostoc,# mark style d.f. style
squareness = 21, # corners not rounded
n=150, # number of vertices in rounded items.
markN=2,
chromatids = FALSE,
chrWidth = 4, # chr. width
lwd.chr = 0.1,
chrId="none", # no chr. name
legend="inline", # for arrows, this mimics cM and cMLeft marks
#
markLabelSize=0.25, # font size of labels
pattern= mypattern, # remove pattern from mark names
#
ylimBotMod = 5, # modify plot size
ylimTopMod = 5,
xlimLeftMod = 5,
xlimRightMod = 5,
#
# # circular plot params.
circularPlot = TRUE, # circular
shrinkFactor = 1, # use 100% of circle
labelSpacing = 1, # label spacing from chr.
rotation=rotaVal, # begin chr. in top
labelOutwards = TRUE # label projected based on mark angle
#
,OTUjustif = 0.5 # OTU name centered
,OTUplacing = "simple" # location of OTU name, see OTUcentered
,radius = 8 # radius of circle
,OTUTextSize = 3 # font size of OTU name
,cMBeginCenter = TRUE # label of arrows (inline) start in the middle
)
dev.off()