knitr::opts_chunk$set(dpi=200, out.width='2000px')
options(width = 2000)

plotPE = function(bed, add=FALSE, percent=FALSE, col=c('#ff000055', '#0000ff55')) {
   d = data.frame(pos=seq(min(c(bed$start, bed$end)), max(c(bed$start, bed$end))))
   total = ifelse(percent, sum(bed$count)/100, 1)
   d$starts = sapply(d$pos, function(p) sum(bed$count[bed$start == p]))/total
   d$ends = sapply(d$pos, function(p) -sum(bed$count[bed$end == p]))/total
   plotBounds(d$pos, d$starts, d$ends, col, add)
}

plotBounds = function(pos, starts, ends, width=1, col=c('#ff000055', '#0000ff55'), density=NA, border=NA, add=FALSE, percent=F) {
   if(length(pos) != length(starts) | length(pos) != length(ends)) stop('pos, starts and ends need to be same length')
   if(percent) { starts = starts/(sum(starts)+sum(ends))*100; ends = ends/(sum(starts)+sum(ends))*100 }
   if(!add) {
      plot(0, xlim=range(pos), ylim=range(c(starts, -ends)), type='n', ylab='  <- Ends    |    Starts ->', xlab='Position')
      abline(h=0)
   }
   rect(pos-width/2, 0, pos+width/2, starts, col=col[2], border=border, density=density)
   rect(pos-width/2, -ends, pos+width/2, 0, col=col[1], border=border, density=density)
}

plotBoundsBatch = function(pos, starts, ends, col=c('#ff000055', '#0000ff55'), density=NA, border=NA, start, end, pad=20, seq='', struc='', separate.ends=F, title='', file='') {
   if(length(col) < ncol(starts)) { col = rep(col, ncol(starts)) }
   if(length(density) < ncol(starts)) { density = rep(density, ncol(starts)) }
   if(length(border) < ncol(starts)) { border = rep(border, ncol(starts)) }
   if(file!='') {pdf(file, 12, 6)}
   if(separate.ends) {
      # if(file!='') {pdf(file, 12, 6)}
      layout(matrix(c(1,3,2,3), nrow=2))
      sel = start+(-pad:pad)
      s = starts[pos %in% sel,]; e = matrix(0, length(sel), ncol(s)); total = colSums(s)+colSums(e)+1; s = t(t(s)/total)*100; e = t(t(e)/total)*100;
      plot(0, type='n', xlim=c(start-pad, start+pad), ylim=range(c(s, -e), na.rm=T)*1.1, xlab='Position', ylab='<- Ends    %     Starts ->', main='5`', xaxt='n')
      axis(1, c(round(pad/5):1*-5+1, 0, 1, 1:round(pad/5)*5), rep('', 2*round(pad/5)+2), cex.axis=0.7)
      axis(1, -(pad-1):pad, rep('', 2*pad), tck=-0.01)
      mtext(paste0(rep(c('-', '+'), each=round(pad/5)+1), c(round(pad/5):1*5, 1, 1, 1:round(pad/5)*5)), 1, line=0.5, outer=F, at=c(round(pad/5):1*-5+1, -0.2, 1.2, 1:round(pad/5)*5), cex=0.8)
      mtext(seq[start:(start+pad)], at=start:(start+pad), cex=0.4)
      if(ncol(starts)>3) { abline(v=start+(-pad:pad)-0.5, col='#55555555', lty=3) }
      abline(v=c(start-.5, end+.5), col='red', lty=2)
      for(i in 1:ncol(starts)) {
         plotBounds(pos[pos %in% sel]-0.5+i/(ncol(starts)+1), s[,i], e[,i], width=1/(ncol(starts)+1), col=rep(col[i], 2), density=density[i], border=border[i], add=T)
      }
      abline(h=0)
      
      sel = (end-pad):(end+pad)
      s = matrix(0, length(sel), ncol(starts)); e = ends[pos %in% sel,]; total = colSums(s)+colSums(e)+1; s = t(t(s)/total)*100; e = t(t(e)/total)*100;
      plot(0, type='n', xlim=c(end-pad, end+pad), ylim=range(c(s, -e), na.rm=T)*1.1, xlab='Position', ylab='<- Ends    %     Starts ->', main='3`', xaxt='n')
      axis(1, end+round(-pad/5):round(pad/5)*5, rep('', 2*round(pad/5)+1), cex.axis=0.7)
      axis(1, -(pad-1):pad+end, rep('', 2*pad), tck=-0.01)
      mtext(end+round(-pad/5):round(pad/5)*5, 1, line=0.5, outer=F, at=end+round(-pad/5):round(pad/5)*5, cex=0.8)
      mtext(seq[(end-pad):end], at=(end-pad):end, cex=0.4)
      if(ncol(starts)>3) { abline(v=(end-pad):(end+pad)-0.5, col='#55555555', lty=3) }
      abline(v=c(start-.5, end+.5), col='red', lty=2)
      for(i in 1:ncol(starts)) {
         plotBounds(pos[pos %in% sel]-0.5+i/(ncol(starts)+1), s[,i], e[,i], width=1/(ncol(starts)+1), col=rep(col[i], 2), density=density[i], border=border[i], add=T)
      }
      abline(h=0)
      
      sel = (start+pad):(end-pad)
      s = starts[pos %in% sel,]; e = ends[pos %in% sel,]; total = colSums(s)+colSums(e)+1; s = t(t(s)/total)*100; e = t(t(e)/total)*100;
      plot(0, type='n', xlim=c(start+pad, end-pad), ylim=range(c(s, -e), na.rm=T)*1.1, xlab='Position', ylab='<- Ends    %     Starts ->', main='middle')
      mtext(seq[sel], at=sel, cex=0.4)
      abline(v=c(start-.5, end+.5), col='red', lty=2)
      for(i in 1:ncol(starts)) {
         plotBounds(pos[pos %in% sel]-0.5+i/(ncol(starts)+1), s[,i], e[,i], width=1/(ncol(starts)+1), col=rep(col[i], 2), density=density[i], border=border[i], add=T)
      }
      abline(h=0)
      
      par(mfrow=c(1,1))

   } else {
      par(mfrow=c(1,1))
      plot(0, type='n', xlim=c(start-pad, end+pad), ylim=c(-80, 80), xlab='Position', ylab='<- Ends    %     Starts ->', main=title)
      mtext(seq, at=start:end, cex=0.4)
      mtext(struc, line=-0.5, at=start:end, cex=0.4)
      abline(v=c(start-.5, end+.5), col='red', lty=2)
      for(i in 1:ncol(starts)) {
         plotBounds(pos-0.5+i/(ncol(starts)+1), starts[, i]/sum(starts[, i])*100, ends[,i]/sum(ends[, i])*100, width=1/(ncol(starts)+1), col=rep(col[i], 2), density=density[i], border=border[i], add=T, percent=F)
      }
      abline(h=0)
   }
   if(file!='') {dev.off()}
}

plotStacked = function(bed, add=FALSE, hide.less=0, col='blue') {
   bed = bed[order(bed$start, bed$end), ]
   bed = bed[bed$count > hide.less, ]
   bed$h = cumsum(bed$count)-bed$count
   
   if(!add) { plot(0, xlim=range(c(bed$start, bed$end)), ylim=c(0, sum(bed$count)), type='n', ylab='Reads', xlab='Position') }
   rect(bed$start-0.5, bed$h, bed$end+0.5, bed$h+bed$count, col=col, border=NA)
}

plotCoverage = function(bed, add=FALSE, percent=FALSE, col='blue', border=NA, lwd=1) {
   c = data.frame(pos=seq(min(c(bed$start, bed$end)), max(c(bed$start, bed$end))))
   c$depth = sapply(c$pos, function(p) sum(bed$count[bed$start <= p & p <= bed$end]))
   if(percent) c$depth = c$depth/max(c$depth)*100
   if(!add) { plot(0, xlim=range(c$pos), ylim=c(0, max(c$depth)), type='n', ylab='Depth [%]', xlab='Position') }
   polygon(rep(c$pos, each=2)+c(-.5, .5), rep(c$depth, each=2), col=col, border=border, lwd=lwd)
}

plotNorthern = function(bed, background=NA, lim=c(1, 500), smooth=0.2, horizontal=T, file='') {
   bed$l = bed$end-bed$start
   frags = sapply(seq(lim[1], lim[2]), function(x) sum(bed$count[bed$l == x]))
   # frags = aggregate(count ~ l, bed, sum)
   frags.smooth = smooth.spline(x=seq(lim[1], lim[2]), (frags)^(1/3), spar=smooth)
   if(file!='') {pdf(file, 12, 6)}
   if(horizontal) {
      plot(0, ylim=c(0,1), xlim=lim, type='n', ylab='', xlab='Size [nt]', yaxt='n', log='x')
      rect(frags.smooth$x, 0, frags.smooth$x+1, 1, col=rgb(0, 0, 0, abs(frags.smooth$y)/max(frags.smooth$y)), border=NA)
   } else {
      plot(0, xlim=c(0,1), ylim=lim, type='n', xlab='', ylab='Size [nt]', xaxt='n', log='y', las=2)
      rect(0, frags.smooth$x, 1, frags.smooth$x+1, col=rgb(0, 0, 0, abs(frags.smooth$y)/max(frags.smooth$y)), border=NA)
   }
   if(file!='') {dev.off()}
}

Bioinformatic Methods

All libraries (old and new) were freshly processed including quality assessment, adapter trimming and trimming of bad quality bases at the ends, using fastp v0.23.2(Chen et al. 2018). Trimmed reads were then mapped against Bacillus subtilis 168 genome (NC_000964.3, GCF_000009045.1_ASM904v1) and alignments extracted for loci of 6S-1 and -2 +/-100nt. In the first sequencing run was analyzed using segemehl v0.2.0 only. For the new run, I re-mapped all old and new data with different mapping algorithms: segemehl(Hoffmann et al. 2014), bwa(Li and Durbin 2010), bowtie2(Langmead and Salzberg 2012), STAR(Dobin et al. 2013) to compare different approaches. The newest version of segemehl v0.3.4 had problems with trimmed reads (could not read all reads in library). Since all mappers yield mostly indifferent results, I proceeded with bwa v0.7.17-r1188 mapped libraries. Only perfectly mapping paired reads were used.

Results

There are four different visual representations of the paired-end reads:

  • Fragment starts (positive y-axis) and fragment ends (negative y-axis) give a good picture which major processing positions exist.
  • Stacked fragments sorted by start position give the most complete overview of the distribution.
  • Fragment depth allows the best overview of cumulative transcript lengths. Here Black and gray outlines represent the WT coverage in run 1 and 2 respective for comparison.
  • in silico Northern-blot of fragment sizes (frequency of fragment lengths is transformed to see more low abundance fragments and sizes are smoothed to reflect gaussian gel migration pattern)

Positions are relative to the respective annotated 6S gene starting with \(0\) and might not correspond exactly to canonical 6S transcript indices! Old libraries are colored red (gray outline for WT reference) and new libraries are colored blue (black outline for WT reference).

seq = seqinr::read.fasta('data/GCF_000009045.1_ASM904v1_genomic.fna', forceDNAtolower=F)[[1]]
rna6s = read.csv('6sRNA.bed', sep='\t', col.names=c('chrom', 'start', 'end', 'name', 'score', 'strand', 'fold'), header=F)
rna6s$seq = apply(rna6s, 1, function(r) {
  s = chartr('T', 'U', toupper(paste0(seq[r[['start']]:r[['end']]], collapse='')))
  if(r[['strand']]=='-') {s = chartr('ACGTUN', 'UGCAAN', toupper(paste0(seq[r[['end']]:(r[['start']])], collapse='')))}
  strsplit(s, '')[[1]]})
  # paste0(s, collapse='')})
rna6s$length = rna6s$end - rna6s$start +1
rna6s$fold = apply(rna6s, 1, function(x) strsplit(x[['fold']], '')[[1]])
# rna6s$seq = apply(rna6s, 1, function(r) {paste0(ifelse(r[['strand']]=='-', seq[r[['end']]:r[['start']]], seq[r[['start']]:r[['end']]]), collapse='')})
# rna6s$seq = chartr('ACGTNU', 'UGCANA', paste0(toupper(rna6s$seq), collapse=''))
ds = read.csv('6sRNAse.meta.tsv', header=T, sep='\t')
# ds = subset(ds, rnase!='Y')
# ds$hue = rep(seq(0, floor(nrow(ds)/2)-1)/(nrow(ds)/2+3), each=2)
# ShortRead::countFastq(ds$fastq1)
# ShortRead::countFastq(paste0('data/', ds$name, '.R1.fq.gz'))
stats = read.csv('6sRNAse.stats.tsv', header=T, sep='\t')
fc = read.csv('data/6sRNAse.counts.fc.tsv.summary', header=T, sep='\t')
names(fc) = gsub('.bwa.sort.bam', '', names(fc))
names(fc)[grep('4erKO', names(fc))] = substr(names(fc)[grep('4erKO', names(fc))], 2, 99)
counts = read.csv('data/6sRNAse.counts.fc.tsv', header=T, sep='\t', skip=1)
names(counts) = gsub('.bwa.sort.bam', '', names(counts))
names(counts)[grep('4erKO', names(counts))] = substr(names(counts)[grep('4erKO', names(counts))], 2, 99)
stats$genomic = colSums(fc[1:13, stats$name])
stats$transcript = colSums(fc[1, stats$name])
stats$`5S` = colSums(counts[counts$Geneid %in% c('gene-BSU_rRNA_5', 'gene-BSU_rRNA_26', 'gene-BSU_rRNA_8', 'gene-BSU_rRNA_11', 'gene-BSU_rRNA_15', 'gene-BSU_rRNA_24', 'gene-BSU_rRNA_28', 'gene-BSU_rRNA_13', 'gene-BSU_rRNA_18', 'gene-BSU_rRNA_21'), stats$name])
stats$`6S1` = colSums(counts[counts$Geneid == 'gene-BSU_misc_RNA_41', stats$name])
stats$`6S2` = colSums(counts[counts$Geneid == 'gene-BSU_misc_RNA_32', stats$name])
stats$`6S1.pp` = 0
stats$`6S2.pp` = 0
pad = 15
for(run in 1:2) {
   cat(paste('\n\n## Run', run, '\n'))
   for(i in 1:nrow(rna6s)) {
      rna = rna6s[i,]
      cat(paste('\n\n###', rna$name, '\n'))
      ref1 = read.csv(paste0('data/', ds$name[ds$rnase == 'WT' & ds$rep == run], '.bwa.', rna$name, '.bed'), sep='\t', col.names=c('chrom', 'start', 'end', 'count'))
      ref1$start = ref1$start+1 - rna$start
      ref1$end = ref1$end - rna$start
      # ref1 = ref1[ref1$start >= -pad & ref1$end <= rna$length+pad, ]
      if(rna$strand=='-') {
         seq = rev(chartr('ACGTNU', 'UGCANA', seq))
         tmp = rna$length - ref1$end
         ref1$end = rna$length - ref1$start
         ref1$start = tmp
       }
      # ref1$start = -(ref1$start+1 - rna$start) + (rna$end-rna$start)
      # ref2 = read.csv(paste0('data/', ds$name[ds$rnase == 'WT' & ds$rep == 2], '.bwa.', rna$name, '.bed'), sep='\t', col.names=c('chrom', 'start', 'end', 'count'))
      # ref2$start = ref2$start+1
      d = data.frame(pos=-pad:(rna$length+pad))
      pos = d$pos
      starts = matrix(0, nrow(d), length(unique(ds$rnase[ds$rep==run])))
      ends = starts
      total1 = sum(ref1$count)/100
      d$ref1_starts = sapply(d$pos, function(p) sum(ref1$count[ref1$start == p]))
      # starts[,1] = d$ref1_starts
      d$ref1_ends = sapply(d$pos, function(p) sum(ref1$count[ref1$end == p]))
      # ends[,1] = d$ref1_ends
      # total2 = sum(ref2$count)/100
      # d$ref2_starts = sapply(d$pos, function(p) sum(ref2$count[ref2$start == p]))/total2
      # d$ref2_ends = sapply(d$pos, function(p) -sum(ref2$count[ref2$end == p]))/total2
      rnases = unique(ds$rnase[ds$rep==run])
      for(j in 1:length(rnases)) {
         r = rnases[j]
         cat(paste('\n\n####', r, '\n'))
         hue = ds$hue[ds$rnase==r & ds$rep ==run]
         bed1 = read.csv(paste0('data/', ds$name[ds$rnase == r & ds$rep == run], '.bwa.', rna$name, '.bed'), sep='\t', col.names=c('chrom', 'start', 'end', 'count'))
         if(i == 1) stats[stats$name == ds$name[ds$rnase == r & ds$rep == run], '6S1.pp'] = sum(bed1$count)
         if(i == 2) stats[stats$name == ds$name[ds$rnase == r & ds$rep == run], '6S2.pp'] = sum(bed1$count)
         
         bed1$start = bed1$start+1 - rna$start
         bed1$end = bed1$end - rna$start
         # bed1 = bed1[bed1$start >= -pad & bed1$end <= rna$length+pad, ]
         if(rna$strand=='-') {
            tmp = rna$length - bed1$end
            bed1$end = rna$length - bed1$start
            bed1$start = tmp
         }
         total1 = sum(bed1$count)/100
         d$bed1_starts = sapply(d$pos, function(p) sum(bed1$count[bed1$start == p]))
         d$bed1_ends = sapply(d$pos, function(p) sum(bed1$count[bed1$end == p]))
         starts[,j] = d$bed1_starts
         ends[,j] = d$bed1_ends
         # bed2 = read.csv(paste0('data/', ds$name[ds$rnase == r & ds$rep == 2], '.bwa.', rna$name, '.bed'), sep='\t', col.names=c('chrom', 'start', 'end', 'count'))
         # bed2$start = bed2$start+1
         # total2 = sum(bed2$count)/100
         # d$bed2_starts = sapply(d$pos, function(p) sum(bed2$count[bed2$start == p]))/total2
         # d$bed2_ends = sapply(d$pos, function(p) -sum(bed2$count[bed2$end == p]))/total2
         
         plotBoundsBatch(d$pos, d[, c(4, 2)], d[, c(5, 3)], col=c(hsv(hue, 1, 1), '#55555555'), start=1, end=rna$length, pad=pad, seq=rna$seq[[1]], struc=rna$fold[[1]], title=r, file="")#paste0('processing_', rna$name, '.', r, '.', run, '.pdf'))
         plotBoundsBatch(d$pos, d[, c(4, 2)], d[, c(5, 3)], col=c(hsv(hue, 1, 1), '#00000055'), start=1, end=rna$length, pad=pad, seq=rna$seq[[1]], struc=rna$fold[[1]], title=r, separate.ends=T, file="")#paste0('processing_', rna$name, '.', r, '.', run, '_zoom.pdf'))
         
         # pdf(file=paste0('processing_', rna$name, '.', r, '.', run, '_stack.pdf'), 12, 6)
         plot(0, type='n', xlim=c(-pad, rna$length+pad), ylim=c(0, max(sum(bed1$count), 1)), xlab='Position', ylab='Reads', main=r)
         mtext(rna$seq[[1]], at=1:rna$length, cex=0.4)
         mtext(rna$fold[[1]], 3, -0.5, at=1:rna$length, cex=0.4)
         plotStacked(bed1, T, col=hsv(hue, 1, 1))
         abline(v=c(.5, rna$length+.5), col='red', lty=2)
         # dev.off()
         
         # pdf(file=paste0('processing_', rna$name, '.', r, '.', run, '_coverage.pdf'), 12, 6)
         plot(0, type='n', xlim=c(-pad, rna$length+pad), ylim=c(0, 100), xlab='Position', ylab='Coverage', main=r)
         mtext(rna$seq[[1]], at=1:rna$length, cex=0.4)
         # plotCoverage(bed2, T, T, col=hsv(hue, s=0.8, v=1, alpha=0.5))
         plotCoverage(bed1, T, T, col=hsv(hue, s=1, v=1, alpha=1))
         # plotCoverage(ref2, T, T, col=NA, border='black')
         plotCoverage(ref1, T, T, col=NA, border='black')
         abline(v=c(.5, rna$length+.5), col='red', lty=2)
         legend('topright', legend=c(r, 'WT'), fill=c(hsv(hue, s=1, v=1, alpha=1), 'black'), cex=0.5)
         # dev.off()
         # d$diff_starts = d$bed1_starts-d$ref1_starts
         # d$diff_starts_plus = d$diff_starts; d$diff_starts_plus[d$diff_starts<0] = 0
         # d$diff_starts_minus = -d$diff_starts; d$diff_starts_minus[d$diff_starts>0] = 0
         # d$diff_ends = d$bed1_ends-d$ref1_ends
         # d$diff_ends_plus = -d$diff_ends; d$diff_ends_plus[d$diff_ends>0] = 0
         # d$diff_ends_minus = d$diff_ends; d$diff_ends_minus[d$diff_ends<0] = 0
         # plotBoundsBatch(d$pos, d[, c('diff_starts_plus', 'diff_starts_minus')], d[, c('diff_ends_plus', 'diff_ends_minus')], col=c('#ff000055', '#0000ff55'), density=c(NA, NA), start=1, end=rna$length, pad=pad, seq=rna$seq[[1]], title=paste(r, 'diff'), border=c(NA, NA))
         # plotBoundsBatch(d$pos, d[, c('diff_starts_plus', 'diff_starts_minus')], d[, c('diff_ends_plus', 'diff_ends_minus')], col=c('#ff000055', '#0000ff55'), density=c(NA, NA), start=1, end=rna$length, pad=pad, seq=rna$seq[[1]], title=paste(r, 'diff'), separate.ends=T, border=c(NA, NA))
         # 
      }
      
      cat(paste('\n\n#### All at once\n'))
      plotBoundsBatch(pos, starts[, ds$show[ds$rep==run]], ends[, ds$show[ds$rep==run]], col=c(hsv(ds$hue[ds$rep==run & ds$show], 1, 1)), start=1, end=rna$length, pad=pad, seq=rna$seq[[1]], struc=rna$fold[[1]], separate.ends=T, file="")#paste0('processing_', rna$name, '.all.', run, '.pdf'))
      
      cat(paste('\n\n#### *in silico* Northern blot\n'))
      # pdf(file=paste0('processing_', rna$name, '.all.', run, '_northern.pdf'), 12, 6)
      par(mfrow=c(1, length(unique(ds$rnase[ds$rep==run]))), mar=c(3,6,2,1))
      for(r in unique(ds$rnase[ds$rep==run])) {
         bed1 = read.csv(paste0('data/', ds$name[ds$rnase == r & ds$rep == run], '.bwa.', rna$name, '.bed'), sep='\t', col.names=c('chrom', 'start', 'end', 'count'))
         # cat(paste0(r, '\t', sum(bed1$count), '\n'))
         plotNorthern(bed1, lim=c(30, 250), horizontal=F, smooth=0.2)
         title(r)
         # mtext(paste0('# Reads\n', format(sum(bed1$count), big.mark=',')), 1, 2, cex=0.8)
      }
      # dev.off()
   }
}

Run 1

6S1

WT

J1

PH

Y

4erKO

All at once

in silico Northern blot

6S2

WT

J1

PH

Y

4erKO

All at once

in silico Northern blot

Run 2

6S1

WT

J1

PH

Ystat1.1

Ystat1.2

Ystat2.1

Ystat2.2

4erKO

All at once

in silico Northern blot

6S2

WT

J1

PH

Ystat1.1

Ystat1.2

Ystat2.1

Ystat2.2

4erKO

All at once

in silico Northern blot

Sequencing library statistics

Number of reads per library: from number or raw read pairs, pairs after trimming, fragments mapping to the genome, fragments mapping to annotated features, and of these mapping to 5S-RNA, 6S1-RNA or 6S2-RNA. Fragments are considered as uniquely mapping read pairs or just one mate in the pair if the other did not map unambiguously. The number of fragments per S-RNA can differ (more) from the number or perfectly mapping read pairs (*.pp) displayed in the last columns (fewer, this number should be considered).

knitr::kable(stats, format.args = list(big.mark = ","), align = 'r')
name raw trimmed genomic transcript 5S 6S1 6S2 6S1.pp 6S2.pp
WT.1 6,580,880 4,982,010 4,825,055 4,311,658 4,385 802,053 33,652 789,669 30,107
WT.2 21,664,115 20,041,648 19,848,718 17,213,610 1,623 3,514,326 176,661 3,486,982 174,239
J1.1 7,290,021 6,720,714 6,637,122 5,649,583 2,809 177,604 164,767 167,968 154,747
J1.2 25,422,926 24,172,587 23,871,200 20,656,492 1,682 740,846 1,392,131 733,118 1,386,459
PH.1 6,707,789 5,259,573 5,040,009 4,586,486 5,132 879,357 34,002 868,814 30,647
PH.2 22,429,417 20,871,978 20,592,081 15,795,439 3,707 1,548,235 240,131 1,537,289 237,748
Y.1 5,756,837 4,938,886 4,578,539 3,748,380 16,033 172,739 161,351 167,602 136,064
Ystat1.1 18,132,670 16,977,213 16,096,996 10,441,515 2,895 416,757 1,331,483 412,993 1,328,649
Ystat1.2 19,388,752 18,131,582 17,169,115 7,606,096 2,987 143,929 295,850 39,028 118,891
Ystat2.1 20,446,444 18,854,988 17,097,921 13,349,516 1,996 1,088,740 110,040 1,081,392 108,559
Ystat2.2 16,760,983 15,330,916 14,906,531 11,225,937 1,816 479,915 42,016 470,954 40,400
4erKO.1 6,027,032 5,390,962 5,298,028 4,596,826 2,157 604,098 14,096 593,391 12,060
4erKO.2 21,671,635 20,143,627 19,646,434 17,251,975 3,580 882,693 45,073 873,928 43,298
Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. fastp: an ultra-fast all-in-one FASTQ preprocessor.” Bioinformatics 34 (17): i884–90. https://doi.org/10.1093/bioinformatics/bty560.
Dobin, Alexander, Carrie A. Davis, Felix Schlesinger, Jorg Drenkow, Chris Zaleski, Sonali Jha, Philippe Batut, Mark Chaisson, and Thomas R. Gingeras. 2013. STAR: ultrafast universal RNA-seq aligner.” Bioinformatics 29 (1): 15–21. https://doi.org/10.1093/bioinformatics/bts635.
Hoffmann, Steve, Christian Otto, Gero Doose, Andrea Tanzer, David Langenberger, Sabina Christ, Manfred Kunz, et al. 2014. A multi-split mapping algorithm for circular RNA, splicing, trans-splicing and fusion detection.” Genome Biol. 15 (2): 1–11. https://doi.org/10.1186/gb-2014-15-2-r34.
Langmead, Ben, and Steven L. Salzberg. 2012. Fast gapped-read alignment with Bowtie 2.” Nat. Methods 9 (April): 357–59. https://doi.org/10.1038/nmeth.1923.
Li, Heng, and Richard Durbin. 2010. Fast and accurate long-read alignment with BurrowsWheeler transform.” Bioinformatics 26 (5): 589–95. https://doi.org/10.1093/bioinformatics/btp698.