Visualize the annotated reference file
library(TraceQC)
ref_file <- system.file("extdata/test_data/ref","ref_carlin.txt",package="TraceQC")
ref <- parse_ref_file(ref_file)
plot_construct(ref,chr_per_row=50,chr_size=5)
Sequence alignment
input_file <- system.file("extdata/test_data/raw_sequence","carlin_example.bam",package="TraceQC")
output_file <- "./aligned_reads.txt"
sequence_alignment_for_10x(input_file=input_file,ref_file=ref_file,
output_file=output_file)
Find optimal mutation cutoff using permutation
library(readr)
library(ggplot2)
library(dplyr)
sequence_permutation(ref_file,out="./seq_permutate.txt")
## [1] TRUE
seq_permutate <- read_tsv("./seq_permutate.txt")
model <- loess(score~permutate_percent,data=seq_permutate)
aligned_reads <- read_tsv("./aligned_reads.txt")
ggplot(aligned_reads) +
geom_histogram(aes(x=score,y=stat(count)/sum(count)),bins=50) +
geom_vline(xintercept=predict(model,0.4),linetype="dashed",color="red",size=2) +
xlab("alignment score") +
ylab("frequency") +
theme_classic()
Identify mutations from the aligned reads
# group the identical sequence together for faster runtime
unique_sequences <- group_by(aligned_reads,target_seq,target_ref) %>%
summarise(score=max(score)) %>%
ungroup
alignment_score_cutoff <- predict(model,0.4)
mutation <- seq_to_character(unique_sequences,use_CPM=FALSE,
alignment_score_cutoff=alignment_score_cutoff)
write_tsv(mutation,"./mutation.txt")
mutation <- read_tsv("./mutation.txt")
Merge mutations based on UMI and cell barcodes
mutation <- left_join(mutation,
select(aligned_reads,target_seq,CB,UB))
rc_per_UB <- get_read_count_per_UB(aligned_reads)
mutation <- left_join(mutation,rc_per_UB)
# select mutations appear in more than 50% reads in each UMI
mutation <- filter_mutations_per_UMI(mutation,include_max=FALSE,freq_threshold=0.5)
# select mutations appear in more than 50% UMI in each CB
ub_per_cell <- get_UMI_count_per_CB(aligned_reads)
mutation <- left_join(mutation,ub_per_cell)
mutation <- filter_mutations_per_cell(mutation,freq_threshold=0.5)
Visualize mutation results
mutation_type_donut(mutation)
plot_deletion_hotspot(mutation,ref,use_log_count = FALSE)
plot_insertion_hotspot(mutation,ref,use_log_count = FALSE)
plot_point_substitution_hotspot(mutation,ref)