Visualize the annotated reference file

library(TraceQC)
ref_file <- system.file("extdata/test_data/ref","ref_hgRNA_invitro.txt",package="TraceQC")
ref <- parse_ref_file(ref_file)
plot_construct(ref,chr_per_row=50,chr_size=5)

Sequence alignment

input_file <- system.file("extdata/test_data/raw_sequence","hgRNA_example.fastq.gz",package="TraceQC")
output_file <- "./aligned_reads.txt"
sequence_alignment(input_file=input_file,ref_file=ref_file,
                   output_file=output_file)

Find optimal mutation cutoff using permutation

library(readr)
library(ggplot2)
library(dplyr)

sequence_permutation(ref_file,out="./seq_permutate.txt")
## [1] TRUE
seq_permutate <- read_tsv("./seq_permutate.txt")
model <- loess(score~permutate_percent,data=seq_permutate)
aligned_reads <- read_tsv("./aligned_reads.txt")
ggplot(aligned_reads) +
  geom_histogram(aes(x=score,y=stat(count)/sum(count)),bins=50) +
  geom_vline(xintercept=predict(model,0.4),linetype="dashed",color="red",size=2) +
  xlab("alignment score") +
  ylab("frequency") +
  theme_classic()

Identify mutations from the aligned reads

# group the identical sequence together for faster runtime
aligned_reads <- group_by(aligned_reads,target_seq,target_ref) %>%
  summarise(count=n(),score=max(score)) %>%
  ungroup
abundance_cutoff <- 0
alignment_score_cutoff <- 0
mutation <- seq_to_character(aligned_reads,abundance_cutoff=abundance_cutoff,
                      use_CPM=FALSE,alignment_score_cutoff=alignment_score_cutoff)

Visualize mutation results

mutation_type_donut(mutation)

plot_deletion_hotspot(mutation,ref)

plot_insertion_hotspot(mutation,ref)

plot_point_substitution_hotspot(mutation,ref)

write formatted output file

library(knitr)
out_df <- format_mutation_df(mutation,is_singlecell=FALSE)
kable(head(arrange(out_df,desc(count))))
character type start length alt count
I:S82L1->A insertion 82 1 A 2128
U:S0L0 unmutated 0 0 541
I:S81L2->AA insertion 81 2 AA 254
S:S83L1->C substitution 83 1 C 95
S:S108L1->C substitution 108 1 C 94
S:S83L1->G substitution 83 1 G 75
write_tsv(out_df,"./mutations.traceqc.txt")