9 Circular Plots

Jupyter interactive version:

visit gitlab for installation instructions https://gitlab.com/ferroao/idiogramFISH

9.1 Example with monocen. and holocen.

{
  require(idiogramFISH)
  require(plyr)
  dfOfChrSize$OTU <- "Species mono"
  dfChrSizeHolo$OTU <- "Species holo"

  monoholoCS <- plyr::rbind.fill(dfOfChrSize, dfChrSizeHolo)

  dfOfMarks2$OTU <- "Species mono"
  dfMarkPosHolo$OTU <- "Species holo"

  monoholoMarks <- plyr::rbind.fill(dfOfMarks2, dfMarkPosHolo)
  monoholoMarks[which(monoholoMarks$markName == "5S"), ]$markSize <- .5

  monoholoMarks[10, ]$markName <- "prot"
  monoholoMarks[10, ]$markSize <- 1
  dfMarkColor <- rbind(dfMarkColor, c("prot", "black", "exProtein"))
}

plotIdiograms(dfChrSize  = monoholoCS,   # data.frame of chr. size
  dfMarkColor = dfMarkColor,  # df of mark style
  dfMarkPos = monoholoMarks,  # df of mark positions, includes cen. marks

  squareness = 5,             # vertices squareness
  addOTUName = TRUE,          # add OTU names
  distTextChr = .5,           # separ. among chr. and text and among chr. name and indices

  chrId = "original",         # use original name of chr.
  OTUTextSize = .7,           # size of OTU name

  legendHeight = 1,           # height of legend labels
  legendWidth = 1,            # width of legend labels
  # ,legend = "inline"
  fixCenBorder = TRUE,        # use chrColor as border color of cen. or cen. marks

  xlimLeftMod = 1,            # modify xlim left argument of plot
  xlimRightMod = 2,           # modify xlim right argument of plot
  ylimBotMod = .2             # modify ylim bottom argument of plot

  , useOneDot = FALSE

  # GRAPHICAL PARAMETERS FOR CIRCULAR PLOT

  , circularPlot = TRUE       # circularPlot
  , shrinkFactor = .9         # percentage 1 = 100% of circle with chr.
  , circleCenter = 3          # X coordinate of circleCenter (affects legend pos.)
  , chrLabelSpacing = .9      # chr. names spacing

  , OTUsrt = 0                # angle for OTU name (or number)
  , OTUplacing = "number"     # Use number and legend instead of name. See OTUcentered
  , OTUjustif = 0             # OTU names justif. left.
  , OTULabelSpacerx = -0.5    # modify position of OTU label, when OTUplacing = "number" or "simple"
  , OTUlegendHeight = 1.5     # space among OTU names when in legend - OTUplacing
  , separFactor = 0.75
)

9.2 Recreating circular karyotype of (Golczyk et al., 2005)

# First swap short and long arms to show the same rotation of the article

listradfs <- swapChrRegionDfSizeAndMarks(traspadf, traspaMarks, c("3", "6", "7", "9", "12"))

# Create marks' characteristics

dfMarkColor5S25S <- read.table(text = "    markName markColor  style
        5S       black dots
       25S       white dots",  header = TRUE, stringsAsFactors = FALSE, fill = TRUE)

plotIdiograms(dfChrSize = listradfs$dfChrSize,  # d.f. of chr. sizes
  dfMarkPos = listradfs$dfMarkPos,  # d.f. of marks' positions
  dfMarkColor = dfMarkColor5S25S,   # d.f. of mark characteristics
  cenColor  = "black",            # cen. color
  squareness = 5,                   # corner squareness
  chrWidth = 1,                     # chr. width
  orderChr = "name"                 # order chr. by name

  , addOTUName = FALSE               # do not add OTU name
  , legendHeight = 3                 # labels separ. y axis

  # circular plot parameters
  , circularPlot = TRUE,
  radius = 5                         # basic radius
  , useOneDot = FALSE                # use two dots in dot marks
  , chrLabelSpacing = 1              # chr. name spacing
  , rotation = 0.1                   # anti-clockwise start site in x*pi radians, from top (0)
  , shrinkFactor = .95               # % of circle use
)

9.3 Plasmid data from genBank

Using upArrow and downArrow styles, clockwise and anti-clockwise, respectively.


# data from: https://www.ncbi.nlm.nih.gov/nuccore/NZ_CP009939.1

# install.packages("rentrez")
library(rentrez)
# search string
bcereus <- "Bacillus cereus strain 03BB87 plasmid pBCN, complete sequence"
bcereus_search <- rentrez::entrez_search(db = "nuccore", term = bcereus)
# get summaries
esummaries <- rentrez::entrez_summary(db = "nuccore", id = bcereus_search$ids)

# download plasmid data
# From the entrez formats:
# https://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.T._valid_values_of__retmode_and/
# idiogramFISH can read only:
rentrezDownloadPlas  <- rentrez::entrez_fetch(db = "nuccore",
  id = bcereus_search$ids[1],
  rettype = "gbwithparts",
  retmode = "text")

mylist <- genBankReadIF(rentrezDownloadPlas)

# data.frames in mylist
names(mylist)
# [1] "gbdfMain"         "gbdfAssemblyMeta"
# [3] "source"           "gene"            
# [5] "CDS"

# mylist$source
# View(mylist$gbdfMain)
# View(mylist$gbdfAssemblyMeta)
# mylist$gbdfAnnoMeta
# View(mylist$CDS)
# View(mylist$gene)

# Authors of plasmid sequence
paste(mylist$gbdfMain[which(mylist$gbdfMain$field == "AUTHORS"), ][1, 2])
# [1] "Johnson,S.L., Minogue,T.D., Teshima,H., Davenport,K.W., Shea,A.A.,; Miner,H.L., Wolcott,M.J. and Chain,P.S."

# create plasmid size data data.frame
{
  myPlasmiddf <- data.frame(chrName = 1, chrSize = mylist$source$end)
  myPlasmiddf$OTU <- mylist$gbdfMain[which(mylist$gbdfMain$field == "DEFINITION"), ]$value
  myPlasmiddf$OTU <- gsub(", complete sequence.", "", myPlasmiddf$OTU)

  # Creating mark info data.frame

  mylistSel <- mylist[which(names(mylist) %in% "gene")]
  mylistSelDF <- dplyr::bind_rows(mylistSel, .id = "feature")

  mylistSelDF$markPos <- pmin(as.numeric(mylistSelDF$begin), as.numeric(mylistSelDF$end))
  mylistSelDF$markSize <- abs(as.numeric(mylistSelDF$end) - as.numeric(mylistSelDF$begin))
  mylistSelDF$markName <- mylistSelDF$locus_tag

  # orientation of arrows
  mylistSelDF$style <- ifelse(mylistSelDF$isComplement, "downArrow", "upArrow")

  # Replace codes with names
  mylistSelDF[which(!is.na(mylistSelDF$gene)), ]$markName <-
    mylistSelDF[which(!is.na(mylistSelDF$gene)), ]$gene

  # subset columns
  marksDfPlas <- mylistSelDF[, c("markName", "markPos", "markSize", "style"), ]

  # add OTU name
  marksDfPlas$OTU <- myPlasmiddf$OTU

  # add mandatory column
  marksDfPlas$chrName <- myPlasmiddf$chrName

  # organize inner arrows (downArrow) in two columns avoiding overlap

  protVal <- .5     # this values (and others) must be the same
  circVal <- TRUE   # in plotIdiograms function
  rotaVal <- 0

  marksDfPlasCols <- namesToColumns(marksDfPlas, myPlasmiddf,
    markType = c("downArrow"),
    amountofSpaces = 10, colNumber = 2,
    protrudingInt = 1.3, protruding = protVal,
    circularPlot = circVal,
    rotation = rotaVal
  )


  # add marker for start pos.
  colnames(marksDfPlasCols)
  marksDfPlasCols <- rbind(marksDfPlasCols, c(paste0("START", paste0(rep(" ", 0), collapse = "")), 1, NA, "square", myPlasmiddf$OTU, 1, NA))

  # create mark general data data.frame
  markStyle   <- makedfMarkColorMycolors(
    unique(marksDfPlasCols$markName), c("black", "forestgreen", "cornflowerblue"))

  # arrows
  markStyle$style      <- marksDfPlasCols$style[match(markStyle$markName, marksDfPlasCols$markName)]
  markStyle$protruding <- marksDfPlasCols$protruding[match(markStyle$markName, marksDfPlasCols$markName)]

  # prefix to remove from marks
  mypattern <- sub("([[:alnum:]]+_).*", "\\1", trimws(marksDfPlas$markName[1]))
}
library(idiogramFISH)
par(mar = rep(0, 4), oma = rep(0, 4))

plotIdiograms(dfChrSize = myPlasmiddf,  # plasmid size d.f.
  dfMarkPos = marksDfPlasCols,  # mark pos d.f.
  dfMarkColor = markStyle,      # mark style d.f.

  chromatids = FALSE,

  squareness = 21,          # corners not rounded
  chrWidth = 0.5,           # chr. width
  chrId = "",               # no chr. name

  markLabelSize = .7,       # font size of labels
  pattern = mypattern,      # remove pattern from mark names
  cMBeginCenter = TRUE,
  legend = "inline",
  protruding = protVal,

  ylimBotMod = 0,           # modify plot size
  ylimTopMod = 0,
  xlimLeftMod = 2,

  # circular params.
  circularPlot = circVal,   # circular
  rotation = rotaVal,       # begin plasmid in top

  radius = 2.5,
  shrinkFactor = 1,         # use 100% of circle
  labelSpacing = 1.7,       # label spacing from chr.
  labelOutwards = TRUE,     # label projected based on mark angle

  OTUjustif = 0.5,          # OTU name justif. centered.
  OTUplacing = "simple"     # plasmid name place. See OTUcentered
  , OTUTextSize = .8        # font size of OTU name
)

9.4 Prokaryote chromosome from genBank

# Option 1: Download prokaryote genome data from:
# https://www.ncbi.nlm.nih.gov/nuccore/NC_014248.1
# Choose Customize View -> Basic Features -> genes, CDS
# Send To -> File -> Create File

# Use your file name:
dataChr.gb <- "nostoc.gb" # 5 Mbytes

# Option 2: Download with rentrez package

library(rentrez)
# search string
nostoc <- "'Nostoc azollae' 0708, complete"
nostoc_search <- rentrez::entrez_search(db = "nuccore", term = nostoc)
# get summaries
esummariesNostoc <- rentrez::entrez_summary(db = "nuccore", id = nostoc_search$ids)
# select only perfect matches
select <- numeric()
for (i in seq_along(esummariesNostoc)) {
  print(esummariesNostoc[[i]]$title)
  if (esummariesNostoc[[i]]$title %in% grep(nostoc, esummariesNostoc[[i]]$title, value = TRUE)) {
    select <- c(select, i)
  }
}
select
# 3 8

# download chr. data
dataChr.gb  <- rentrez::entrez_fetch(db = "nuccore",
  id = nostoc_search$ids[select][1],
  rettype = "gbwithparts",
  retmode = "text")
# START:
library(idiogramFISH)
mylistChr <- genBankReadIF(dataChr.gb) # 9 seconds
names(mylistChr)
# "gbdfMain"     "gbdfAnnoMeta" "source"       "gene"         "CDS"          "tRNA"
# "regulatory"   "ncRNA"        "rRNA"         "misc_feature" "tmRNA"

# Authors of sequence
paste(mylistChr$gbdfMain[which(mylistChr$gbdfMain$field == "AUTHORS"), ][1, 2])
# [1] "Ran, L., Larsson, J., Vigil-Stenman, T., Nylander, J.A., Ininbergs, K.,;
# Zheng, W.W., Lapidus, A., Lowry, S., Haselkorn, R. and Bergman, B."

# create chr. size data data.frame
# columns chrName and chrSize
myProkaryotedf <- data.frame(chrName = 1, chrSize = mylistChr$source$end)
# column with OTU name
myProkaryotedf$OTU <- mylistChr$gbdfMain[which(mylistChr$gbdfMain$field == "DEFINITION"), ]$value
myProkaryotedf$OTU <- gsub(", complete genome.", "", myProkaryotedf$OTU)

# Creating mark info data.frame excluding some features
mylistChrSel  <- mylistChr[which(names(mylistChr) %in%
  setdiff(names(mylistChr), c("gbdfMain", "gbdfAnnoMeta", "source", "CDS")))]
# or:
# mylistSel<- mylistChr[which(names(mylistChr) %in% "CDS")]

# transform list into data.frame
mylistChrDF <- dplyr::bind_rows(mylistChrSel, .id = "feature")
# add necessary columns
mylistChrDF$markPos <- pmin(as.numeric(mylistChrDF$begin), as.numeric(mylistChrDF$end))
mylistChrDF$markSize <- abs(as.numeric(mylistChrDF$end) - as.numeric(mylistChrDF$begin))
mylistChrDF$markName <- mylistChrDF$locus_tag

# Replace codes with genes, and replace NAs in markNames (locus_tag)
mylistChrDF[which(!is.na(mylistChrDF$gene)), ]$markName <-
  mylistChrDF[which(!is.na(mylistChrDF$gene)), ]$gene

mylistChrDF[which(!is.na(mylistChrDF$regulatory_class)), ]$markName <-
  mylistChrDF[which(!is.na(mylistChrDF$regulatory_class)), ]$regulatory_class

# make unique names, otherwise some marks may share style and color
mylistChrDF$markName <- make.uniqueIF(mylistChrDF$markName)

# when no markName and note available:
mylistChrDF[which(is.na(mylistChrDF$markName)), ]$markName <-
  sub("([[:alpha:] ]+);.*", "\\1", mylistChrDF[which(is.na(mylistChrDF$markName)), ]$note)

# orientation of arrows
mylistChrDF$style <- ifelse(mylistChrDF$isComplement, "downArrow", "upArrow")

# select main columns for data.frame of marks' positions
marksDfChr <- mylistChrDF[, c("markName", "markPos", "markSize", "feature", "isJoin", "style"), ]

marksDfChr$OTU <- myProkaryotedf$OTU
# add mandatory column
marksDfChr$chrName <- myProkaryotedf$chrName

# Organize mark names in columns to avoid overlap
rotaVal <- 0
marksDfChrCols <- namesToColumns(marksDfChr, myProkaryotedf,
  markType = c("downArrow", "upArrow"),
  amountofSpaces = 13, colNumber = 4,
  protrudingInt = 0.5,
  rotation = rotaVal)

{
  # add marker for start pos.
  colnames(marksDfChrCols)
  marksDfChrCols <- rbind(marksDfChrCols,
    c("                                                           START", 1, NA,
      "start", FALSE, "square", myProkaryotedf$OTU, 1, NA)
  )

  # unique(marksDfChrCols$markName)

  # create mark general data data.frame
  markStyleNostoc   <- makedfMarkColorMycolors(
    unique(marksDfChrCols$markName), c("black", "forestgreen", "cornflowerblue"))

  unique(marksDfChrCols$feature)
  # [1] "gene"         "tRNA"         "regulatory"   "ncRNA"        "rRNA"       "tmRNA"        "start"
  unique(marksDfChrCols$isJoin)
  # [1] "FALSE"

  # change some colors depending on feature
  markStyleNostoc[which(markStyleNostoc$markName %in%
    marksDfChrCols[which(marksDfChrCols$feature %in% c("tRNA", "tmRNA")), ]$markName
  ), ]$markColor <- "magenta"

  markStyleNostoc[which(markStyleNostoc$markName %in%
    marksDfChrCols[which(marksDfChrCols$feature %in% c("regulatory", "ncRNA")), ]$markName
  ), ]$markColor <- "tomato3"

  markStyleNostoc[which(markStyleNostoc$markName %in%
    marksDfChrCols[which(marksDfChrCols$feature %in% "rRNA"), ]$markName
  ), ]$markColor <- "red2"

  markStyleNostoc[which(markStyleNostoc$markName %in%
    marksDfChrCols[which(marksDfChrCols$feature %in% c("misc_binding", "misc_feature")), ]$markName
  ), ]$markColor <- "lightsalmon"

  # or:
  # When isJoin is TRUE (CDS feature included)
  # markStyleNostoc[which(markStyleNostoc$markName %in%
  #                   marksDfChrCols[which(marksDfChrCols$isJoin==TRUE),]$markName
  # ),]$markColor<-"red"

  # arrows info. to d.f. of charac.
  markStyleNostoc$style      <- marksDfChrCols$style[match(markStyleNostoc$markName, marksDfChrCols$markName)]
  markStyleNostoc$protruding <- marksDfChrCols$protruding[match(markStyleNostoc$markName, marksDfChrCols$markName)]

  mypattern <- sub("([[:alnum:]]+_).*", "\\1", trimws(marksDfChrCols$markName[1]))
}

png("NOSTOC.png", width = 2795, height = 2795) # 2.7 Mb increase size to increase resolution
# pdf("NOSTOC.pdf",   width = 2795/80,  height = 2795/80)
# svg("NOSTOC.svg",   width = 2795/80,  height = 2795/80)  # 42 Mb vectorized

par(mar = rep(0, 4))

plotIdiograms(dfChrSize = myProkaryotedf,   # chr. data d.f.
  dfMarkPos = marksDfChrCols,    # mark pos d.f.
  dfMarkColor = markStyleNostoc, # mark style d.f. style

  squareness = 21,            # corners not rounded
  n = 150,                    # number of vertices in rounded items.
  markN = 2,
  chromatids = FALSE,

  chrWidth = 4,               # chr. width
  lwd.chr  = 0.1,
  chrId = "none",             # no chr. name
  legend = "inline",          # for arrows, this mimics cM and cMLeft marks
  #
  markLabelSize = 0.25,       # font size of labels
  pattern = mypattern,        # remove pattern from mark names
  #
  ylimBotMod = 5,             # modify plot size
  ylimTopMod = 5,
  xlimLeftMod = 5,
  xlimRightMod = 5,
  #
  # # circular plot params.
  circularPlot = TRUE,      # circular
  shrinkFactor = 1,         # use 100% of circle
  labelSpacing = 1,         # label spacing from chr.
  rotation = rotaVal,       # begin chr. in top
  labelOutwards = TRUE      # label projected based on mark angle
  #
  , OTUjustif = 0.5          # OTU name centered
  , OTUplacing = "simple"    # location of OTU name, see OTUcentered
  , radius = 8               # radius of circle
  , OTUTextSize = 3          # font size of OTU name
  , cMBeginCenter = TRUE     # label of arrows (inline) start in the middle
)
dev.off()