Visualize the annotated reference file
library(TraceQC)
ref_file <- system.file("extdata/test_data/ref","ref_hgRNA_invitro.txt",package="TraceQC")
ref <- parse_ref_file(ref_file)
plot_construct(ref,chr_per_row=50,chr_size=5)
Sequence alignment
input_file <- system.file("extdata/test_data/raw_sequence","hgRNA_example.fastq.gz",package="TraceQC")
output_file <- "./aligned_reads.txt"
sequence_alignment(input_file=input_file,ref_file=ref_file,
output_file=output_file)
Find optimal mutation cutoff using permutation
library(readr)
library(ggplot2)
library(dplyr)
sequence_permutation(ref_file,out="./seq_permutate.txt")
## [1] TRUE
seq_permutate <- read_tsv("./seq_permutate.txt")
model <- loess(score~permutate_percent,data=seq_permutate)
aligned_reads <- read_tsv("./aligned_reads.txt")
ggplot(aligned_reads) +
geom_histogram(aes(x=score,y=stat(count)/sum(count)),bins=50) +
geom_vline(xintercept=predict(model,0.4),linetype="dashed",color="red",size=2) +
xlab("alignment score") +
ylab("frequency") +
theme_classic()
Identify mutations from the aligned reads
# group the identical sequence together for faster runtime
aligned_reads <- group_by(aligned_reads,target_seq,target_ref) %>%
summarise(count=n(),score=max(score)) %>%
ungroup
abundance_cutoff <- 0
alignment_score_cutoff <- 0
mutation <- seq_to_character(aligned_reads,abundance_cutoff=abundance_cutoff,
use_CPM=FALSE,alignment_score_cutoff=alignment_score_cutoff)
Visualize mutation results
mutation_type_donut(mutation)
plot_deletion_hotspot(mutation,ref)
plot_insertion_hotspot(mutation,ref)
plot_point_substitution_hotspot(mutation,ref)