6 Changing Units

Jupyter interactive version:

6.1 Using bases instead of micrometers - no. cen.

Create some data in millions of bases:

require(idiogramFISH)
# transform data.frames for simplicity
bigdfChrSizeHoloMb <- bigdfChrSizeHolo # included in idiogramFISH
bigdfChrSizeHoloMb$chrSize <- bigdfChrSizeHoloMb$chrSize * 98000000
bigdfMarkPosHoloMb <- bigdfMarkPosHolo
bigdfMarkPosHoloMb$markPos <- bigdfMarkPosHoloMb$markPos * 98000000
bigdfMarkPosHoloMb$markSize <- bigdfMarkPosHoloMb$markSize * 98000000

Plotting

In the plot length is shown in Mb

png("bigdfChrSizeHolo2.png", width = 700, height = 600)
# par(mar = c(1, 1,1, 1))
par(mar = rep(0, 4))

plotIdiograms(dfChrSize = bigdfChrSizeHoloMb,  # chr. size data.frame
  dfMarkColor = dfMarkColor,       # df of mark style
  dfMarkPos = bigdfMarkPosHoloMb,  # df of mark positions

  markDistType = "cen",            # distance to mark is to its center
  squareness = 4,                  # vertices squareness of chr. and marks
  distTextChr = .5,                # separ. chr. to text

  karHeight = 2,                 # rel. karyotype height
  karHeiSpace = 4,               # karyotype height including spacing
  karSepar = TRUE,               # reduce spacing among karyotypes
  amoSepar = 1,                  # depends on karSepar, amount of sep.

  chrId = "simple",                # chr. names not "original"
  chrSize = TRUE,                  # show chr. size under chr.
  indexIdTextSize = .9,            # font size of chr names and indices
  karIndex = FALSE,                # do not add karyotype asymmetry index

  rulerNumberSize = .9,            # font size of ruler
  rulerPos = 0,                    # position of ruler
  ruler.tck = -.004,               # ruler tick length and orient.
  xPosRulerTitle = 3.5,            # modifies position of ruler title (Mb)

  markLabelSize = .9,              # font size of legend
  legendWidth = 1.2,               # width of legends

  xlimLeftMod = 1,                 # modify left argument of xlim
  ylimBotMod = .4                  # modify bottom argument of ylim
  , chromatids = FALSE             # do not show chromatids
  , OTUfont = 3                    # italics
)
dev.off()

For another example see: https://stackoverflow.com/questions/33727432/how-to-plot-positions-along-a-chromosome-graphic/57153497#57153497

6.2 Using threshold to fix scale

The default value of 35 for threshold may shrink one of the OTUs of this example more than expected. In this case threshold must be bigger.

# fig.width = 7, fig.height = 7
bigdfOfChrSize3_100Mb <- bigdfOfChrSize3Mb
bigdfOfChrSize3_100Mb$chrSize <- bigdfOfChrSize3Mb$chrSize * 33

bigdfOfMarks3_100Mb <- bigdfOfMarks3Mb
bigdfOfMarks3_100Mb$markPos <- bigdfOfMarks3_100Mb$markPos * 33
bigdfOfMarks3_100Mb$markSize <- bigdfOfMarks3_100Mb$markSize * 33

par(mar = rep(0, 4))
plotIdiograms(dfChrSize   = bigdfOfChrSize3_100Mb,  # chr. size data.frame
  dfMarkPos   = bigdfOfMarks3_100Mb,    # mark position df

  chrWidth = .6,              # width of chr.
  chrSpacing = .6,            # space among chr.
  karHeight = 3,              # kar. height without interspace
  karHeiSpace = 5,            # vertical size of karyotype including spacer
  amoSepar = 2,               # separ. among kar.

  indexIdTextSize = .6,       # font size of chr. name and indices
  markLabelSize = .7,         # font size of mark legends
  distTextChr = .65,          # separation among chr. names and indices

  fixCenBorder = TRUE         # use chrColor as border color of cen. or cen. marks
  , legendWidth = 1.5         # legend items width

  , xPosRulerTitle = 3.5      # position of Mb (title) in ruler
  , rulerPos = 0,             # ruler position
  ruler.tck = -0.005,         # ticks of ruler size and orientation
  rulerNumberPos = .7,        # position of numbers in ruler
  rulerNumberSize = .7,       # font size of ruler numbers
  rulerInterval = 1.5,        # ruler interval for micrometeres
  rulerIntervalMb = 50,       # ruler interval for Mb

  ylimBotMod = 0.4,           # modify ylim bottom argument
  ylimTopMod = 0              # modify ylim top argument
  , chromatids = FALSE        # do not show chromatids

  ####  NEW    #####
  , threshold = 90            # this will allow to not to shrink data greater than 350 Mb
)

6.3 Plot data in micrometers and bases

Info in number of bases can be combined in the same plot with info. in micrometers.

Here the new mark style cenStyle is used to add centromeres to “holocen.” (genomes).

To make the rules fit better, having less excess of length over chr., use ceilingFactor.

# fig.width = 10, fig.height = 10
# modify data in millions to hundreds of millions of Mb
bigdfOfChrSize3_100Mb <- bigdfOfChrSize3Mb[1:8, ]
bigdfOfChrSize3_100Mb$chrSize <- bigdfOfChrSize3_100Mb$chrSize * 100

bigdfOfMarks3_100Mb <- bigdfOfMarks3Mb
bigdfOfMarks3_100Mb$markPos <- bigdfOfMarks3_100Mb$markPos * 100
bigdfOfMarks3_100Mb$markSize <- bigdfOfMarks3_100Mb$markSize * 100

# merge data.frames in micrometers and number of bases
mixedThreeSpChrSize <- plyr::rbind.fill(bigdfOfChrSize[1:8, ], bigdfOfChrSize3_100Mb)
# sort by OTU name
mixedThreeSpChrSize <- mixedThreeSpChrSize[order(mixedThreeSpChrSize$OTU), ]

# add cenStyle marks to simulate centromeres in karyo. in Mb (holocen.)
# compare rulers
bigdfSimCenMarks <- bigdfOfChrSize3_100Mb
bigdfSimCenMarks$markPos <- bigdfSimCenMarks$chrSize / 2

bigdfSimCenMarks$markName <- "sim. cen."
bigdfSimCenMarks$chrSize <- NULL

# merge marks in micrometers and bases
mixedThreeSpMarks <- plyr::rbind.fill(bigdfOfMarks, bigdfOfMarks3_100Mb, bigdfSimCenMarks)

# remove cenStyle mark info.
mixedThreeSpMarks <- mixedThreeSpMarks[which(!(mixedThreeSpMarks$OTU %in% "Species 2 genome" &
  mixedThreeSpMarks$chrName %in% c(1, 4) &
  mixedThreeSpMarks$markName %in% "sim. cen.")), ]

# constric. marks
mixedThreeSpMarks[which(mixedThreeSpMarks$OTU %in% "Species 2 genome" &
  mixedThreeSpMarks$chrName %in% c(1, 4)), ]$markName <- c("cDAPI", "cCMA")

# add arrow mark
mixedThreeSpMarks <- dplyr::bind_rows(mixedThreeSpMarks, mixedThreeSpMarks[nrow(mixedThreeSpMarks), ])
mixedThreeSpMarks[nrow(mixedThreeSpMarks), ]$markName <- "S58A"
mixedThreeSpMarks[nrow(mixedThreeSpMarks), ]$markPos <- .7e+08
mixedThreeSpMarks[nrow(mixedThreeSpMarks), ]$markSize <- .7e+08

dfMarkColorAndStyle <- makedfMarkColorMycolors(unique(mixedThreeSpMarks$markName),
  c("red", "chartreuse3", "dodgerblue", "darkgoldenrod1", "dodgerblue", "darkgoldenrod1", "black")
)

# d.f. of marks'styles

dfMarkColorAndStyle$style[5:7] <- "cenStyle"
dfMarkColorAndStyle$markColor[7] <- NA
dfMarkColorAndStyle$style[8] <- "upArrow"

dfMarkColorAndStyle
#    markName      markColor    style
# 1        5S            red     dots
# 2       45S    chartreuse3   square
# 3      DAPI     dodgerblue   square
# 4       CMA darkgoldenrod1   square
# 5     cDAPI     dodgerblue cenStyle
# 6      cCMA darkgoldenrod1 cenStyle
# 7 sim. cen.           <NA> cenStyle
# 8      S58A            red  upArrow

par(mar = rep(0, 4))
plotIdiograms(dfChrSize   = mixedThreeSpChrSize,  # chr. size data.frame
  dfMarkPos   = mixedThreeSpMarks,    # mark position df
  dfMarkColor = dfMarkColorAndStyle,

  chrWidth = .6,              # width of chr.
  chrSpacing = .6,            # space among chr.
  karHeight = 3,              # kar. height without interspace
  karHeiSpace = 5,            # vertical size of karyotype including spacer
  amoSepar = 2,               # separ. among kar.

  indexIdTextSize = .6,       # font size of chr. name and indices
  markLabelSize = .7,         # font size of mark legends
  distTextChr = .65,          # separation among chr. names and indices
  lwd.mimicCen = 1.5,         # constric. line width

  legendWidth = 1.5,          # legend items width
  fixCenBorder = TRUE,        # use chrColor as border color of cen. or cen. marks

  xPosRulerTitle = 3.7,       # position of Mb (title) in ruler
  rulerPos = 0,               # ruler position
  ruler.tck = -0.005,         # ticks of ruler size and orientation
  rulerNumberPos = .7,        # position of numbers in ruler
  rulerNumberSize = .7,       # font size of ruler numbers
  rulerInterval = 1.5,        # ruler interval for micrometeres
  rulerIntervalMb = 150,      # ruler interval for Mb
  ceilingFactor = 1,          # affects rounding for ruler max. value

  ylimBotMod = 0.4,           # modify ylim bottom argument
  ylimTopMod = 0              # modify ylim top argument
  , holocenNotAsChromatids = TRUE # do not use chromatids in holocen.
  , pattern = "^c"             # regex pattern to remove from mark names
  , remSimiMarkLeg = TRUE      # remove pseudoduplicate names arising from pattern removal
)

Let’s explore those data.frames

head(mixedThreeSpChrSize, 6)
OTU chrName shortArmSize longArmSize chrSize
1 Species 1 1 1.5 2.0 NA
2 Species 1 2 2.0 2.5 NA
3 Species 1 3 1.0 2.0 NA
9 Species 1 genome 1 NA NA 350000000
10 Species 1 genome 2 NA NA 450000000
11 Species 1 genome 3 NA NA 250000000
mixedThreeSpMarks[which(mixedThreeSpMarks$OTU %in% c("Species 1","Species 1 genome") ),] 
OTU chrName markName chrRegion markDistCen markSize markPos
1 Species 1 1 5S p 0.5 1 NA
2 Species 1 1 45S q 0.5 1 NA
3 Species 1 2 45S p 1.0 1 NA
4 Species 1 3 DAPI q 1.0 1 NA
13 Species 1 genome 1 5S NA NA 100000000 250000000
14 Species 1 genome 1 45S NA NA 100000000 50000000
15 Species 1 genome 2 45S NA NA 100000000 350000000
16 Species 1 genome 3 DAPI NA NA 100000000 0
25 Species 1 genome 1 sim. cen. NA NA NA 175000000
26 Species 1 genome 2 sim. cen. NA NA NA 225000000
27 Species 1 genome 3 sim. cen. NA NA NA 125000000

6.4 Use cM as units

Info in cM can be combined in the same plot with info. in micrometers.

To make the rules fit better, having less excess of length over chr., use ceilingFactor.

# fig.width = 10, fig.height = 10
# merge data.frames in micrometers and cM
bigdfOfChrSize3cM <- bigdfOfChrSize3Mb[1:8, ]
bigdfOfChrSize3cM$chrSize <- bigdfOfChrSize3cM$chrSize / 100000
mixedThreeSpChrSize2 <- plyr::rbind.fill(bigdfOfChrSize[1:8, ], bigdfOfChrSize3cM)

# sort by OTU name
mixedThreeSpChrSize2 <- mixedThreeSpChrSize2[order(mixedThreeSpChrSize2$OTU), ]

# create data with cM. markSize col. is not necessary because style is cM
bigdfOfMarks3cM <- bigdfOfMarks3Mb
bigdfOfMarks3cM$markPos <- bigdfOfMarks3Mb$markPos / 100000
bigdfOfMarks3cM$markSize <- NA
# As we want only the cM idiograms to be plotted as cM (lines), change mark names
bigdfOfMarks3cM$markName <- paste0("cM", bigdfOfMarks3cM$markName)

# d.f of all marks
mixedThreeSpMarks2 <- plyr::rbind.fill(bigdfOfMarks, bigdfOfMarks3cM)

# create a data.frame with mark characteristics
mixedDfMarkStyle  <- makedfMarkColorMycolors(unique(mixedThreeSpMarks2$markName),
  c("red", "chartreuse3", "dodgerblue", "darkgoldenrod1")
)

# mark names of cM marks with "cM" style (lines): not dots, not squares
mixedDfMarkStyle[which(mixedDfMarkStyle$markName %in%
  grep("cM", mixedDfMarkStyle$markName, value = TRUE)), ]$style <- "cM"

par(mar = rep(0, 4))
plotIdiograms(dfChrSize   = mixedThreeSpChrSize2,  # chr. size data.frame
  dfMarkPos   = mixedThreeSpMarks2,    # mark position data.frame
  dfMarkColor = mixedDfMarkStyle,     # mark style data.frame

  chrWidth = .6,                # width of chr.
  chrSpacing = .7,            # space among chr.

  specialOTUNames = bigdfOfMarks3cM$OTU, # OTUs in this object will have different ruler units
  specialyTitle = "cM",       # ruler title for specialOTUNames
  specialChrWidth = .2,       # modify chr width of OTUs in specialOTUNames
  specialChrSpacing = 1.1,    # modify chr spacing of OTUs in specialOTUNames

  karHeight = 3,              # kar. height without interspace
  karHeiSpace = 6,            # vertical size of karyotype including spacer
  amoSepar = 3,               # separ. among kar.

  chrSize = TRUE,             # show chr. size under chr.
  indexIdTextSize = .6,       # font size of chr. name and indices
  distTextChr = .85,          # separation among chr. names and indices

  protruding = 1,             # extension of cM mark type
  pattern = "cM",             # regex pattern to remove from mark names
  markLabelSize = .7          # font size of mark legends
  , legendWidth = 2           # legend items width
  , fixCenBorder = TRUE       # use chrColor as border color of cen. or cen. marks
  , lwd.cM = 2                # thickness of cM marks
  , holocenNotAsChromatids = TRUE # do not use chromatids in holocen. kar.

  , xPosRulerTitle = 3.2       # position of Mb or cM (title) in ruler
  , rulerPos = 0,              # ruler position
  ruler.tck = -0.005,          # ticks of ruler size and orientation
  rulerNumberPos = .7,         # position of numbers in ruler
  rulerNumberSize = 0.7,       # font size of ruler numbers
  rulerIntervalcM = 12,        # ruler interval for OTU in specialOTUnames and MbThreshold not met
  ceilingFactor = 1,           # affects max. value in ruler. See also rulerInterval

  ylimBotMod = 0.4,           # modify ylim bottom argument
  ylimTopMod = 0              # modify ylim top argument
)