`Cc:
`From:
`Sent:
`Subject:
`
`Christina Fan[chfan @stanford.edu]
`Stephen Ouake[quake@stanford.edu]
`Yair Blumenfeld
`Wed 5/28/2008 8:13:32 PM
`Re: sequencing analysis
`
`Awesome!
`
`Quoting Christina Fan <chfan@stanford.edu>:
`
`>Hi Steve, Yair,
`>
`> I am attaching a graph of the preliminary analysis of the full
`> sequencing run.
`:
`>The run includes 4 T21 and 3 normal samples.
`>The total number of reads .. per sample was about 9 million, of which
`>.about half (4.4 million) on average are useful for analysis.
`> Pay attention to the distribution of chr21.
`>
`>Christina
`
`<?xml version="1.0" encoding="UTF-8"?>
`<!DOCTYPE plist PUBLIC "-1/Apple//DTD PLIST 1.0//EN"
`"http://www.apple.com/DTDs/Propertylist-1.0.dtdn>
`<plist version="1.0">
`<diet>
`<key>date-sent</key>
`<real> 1212005612</real>
`<key>flags</key>
`<integer>8590195969</integer>
`<key>original-mailbox</key>
`<string>imap:/!h.christina.fcim@ imap.gmail.com/INBOX</string>
`<key>remote-id</key>
`·
`<string>3992</string>
`<key>subject</key>
`<string>Re: sequencing analysis</string>
`</diet>
`<lplist>
`
`STANFORD EXHIBIT 2127
`SEQUENOM v. STANFORD
`CASE IPR2013-00390
`
`
`
`.
`
`..
`
`P1, P6, P53: 47 XX +21 (red triangle)
`P52: 47 XY +21 (red circle)
`P26, P40, P42: 46 XY (blue circle)
`
`Coverage: # 25bp reads mapped to a chromosome I size of the chromosome
`
`Average number of total useful reads per sample: 4.4 million
`
`1.4
`
`1.3
`
`1.2
`
`1.1
`
`0.9
`
`0 .8
`
`0.7
`
`0.6
`
`I
`
`I
`
`• I
`
`I
`I
`
`I
`
`I
`
`I I I
`•
`
`~-
`
`A A
`I A
`
`I a •
`•
`• i A
`..
`•
`•
`
`I
`
`•
`
`••
`
`I
`
`I A A
`...
`
`•
`• I A
`•
`• •
`•
`
`' ! •
`•
`
`A P1
`A P53
`e P26
`e P40
`e P42
`A P6
`e P52
`
`1
`
`2
`
`3
`
`4
`
`5
`
`6
`
`7
`
`8
`
`9
`
`10 11 12 13 14 15 16 17 18 19 20 21 22 X Y
`chr
`
`
`
`% look at the distribution of reads across each chromsome
`% ignore golden path
`
`chrname={'chr1, ', 'chr2, ', 'chr3, ', 'chr4, ', 'chr5, ', 'chr6, ', 'chr7, ', 'chr8
`, ', 'chr9, ', 'chr10, ', 'chr11, ', 'chr12, ', 'chr13, ', 'chr14, ' ...
`I chr15, I
`I chr16, I I
`I chr17 I
`I chr18, I I
`I chr19, I
`I chr20, I I
`hr22, ', 'chrX, ', 'chrY, '};
`
`I
`
`I
`
`I I
`
`I
`
`I chr21, I I
`
`I C
`
`chrlength=[247249719
`242951149
`199501827
`191273063
`180857866
`170899992
`158821424
`146274826
`140273252
`135374737
`134452384
`132349534
`114142980
`106368585
`100338915
`88827254
`78774742
`76117153
`63811651
`62435964
`46944323
`49691432
`154913754
`57772954];
`
`load gapregions
`
`%noreadregioncount=cell(24,1);
`%noreadregion=cell(24,1);
`
`readcount=cell(24,1);
`%X=cell(24,1);
`
`for i=1:length(chrname)
`filename=sprintf('%s_coord_U0U1',chrname{i}(1:(end-1)));
`load (filename);
`bin=[0:50000:chrlength(i)];
`bar(bin, histc(chr_coord, bin));
`title(chrname{i}(1:(end-1)));
`xlabel('chr coordinate');
`ylabel('frequency of reads');
`figurefilename=sprintf('%s_distrib.bmp',chrname{i}(1:(end-1)));
`
`
`
`saveas(gcf, figurefilename)
`readcount=[];
`bin=[0:20000:gapbegin{i}(1)];
`readcount=[readcount, histc(chr_coord, bin)];
`for j=1:(length(gapbegin{i}}-1)
`bin=[gapend{i}(j):20000:gapbegin{i}(j+1)];
`readcount=[readcount; histc(chr_coord, bin)];
`
`end
`n{i}=readcount;
`bar( [0: 1: 100], histc( readcount, [0: 1: 100]));
`title(chrname{i}(1:(end-1)));
`xlabel('chr coordinate');
`ylabel('# reads per 20kb');
`figurefilename=sprintf('%s_hist.bmp',chrname{i}(1:(end-1)));
`saveas(gcf, figurefilename)
`%X{i}=[1:50000:chrlength(i)];
`%readcount{i}=histc(read_chr, X{i});
`%distrib=histc(read_chr, X);
`%noread=find(distrib==0);
`%noreadcoord=X(noread);
`%Y=[1:50000:chrlength(i)];
`%[noreadregioncount{i} noreadregion{i}]=hist(noreadcoord,Y);
`%figure, hist(noreadcoord,Y);
`%title(chrname{i}(1:(end-1)));
`%xlabel('chr coordinate');
`%ylabel('frequency of reads');
`
`end
`
`for i=1: 24
`nmed(i)=median(n{i});
`
`end
`
`save readdistrib n
`
`% for i=1:length(chrname)
`%
`tolerance=3*sqrt(median(readcount{i}));
`%
`[b c]=sort(readcount{i});
`%
`a=bin{i}(c);
`%
`outlier_region_U{i}=a(find(b>median(b)+tolerance));
`%
`outlier_region_L{i}=a(find(b<median(b)-tolerance));
`%
`outlier_count_L{i}=b(find(b<median(b)-tolerance));
`%
`outlier_count_U{i}=b(find(b>median(b)+tolerance));
`%
`mostoutlierU_count{i}=flipud(outlier_count_U{i}((end-9):end));
`%
`mostoutlierU_region{i}=flipud(outlier_region_U{i}
`( ( end-9) :end) ' ) ;
`% end
`%
`
`
`
`outlier_region_up=outlier_region_L{i}+50000;
`
`for j=1:length(outlier_region_up)
`if outlier_region_L{i}(j)<chrlength(i)
`
`if outlier_region_up(j)>=chrlength(i)
`outlier_region_up(j)=chrlength(i)-1;
`
`end
`
`%% save outlierU outlier_region_U outlier_count_U mostoutlierU_count
`mostoutlierU_region
`%% save outlierl outlier_region_L outlier_count_L
`%
`%
`% % write coordinates of underrepresented regions to file
`% fid=fopen(•underrepresentedregion.txt•, •wt•);
`%
`%
`%for i=1:length(chrname)-1
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`% end
`%
`% fclose(fid);
`%
`%
`%
`%
`%
`% % write coordinates of underrepresented regions (==0) to file
`% fid=fopen(•underrepresentedregion_0.txt•, •wt•);
`%
`%
`% for
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`
`i=1:length(chrname)-1
`outlier_0=find(outlier_count_L{i}==0);
`outlier_region_0=outlier_region_L{i}(outlier_0);
`outlier_region_0_up=outlier_region_0+50000;
`
`fprintf(fid, •%s\t•,chrname{i}(1:(end-1)));
`fprintf(fid, 1 %d\t•, outlier_region_L{i}(j));
`fprintf(fid, •%d\t•, 9utlier_region_up(j));
`fprintf(fid, •%d\n•, outlier_count_L{i}(j));
`
`end
`
`end
`
`outlier_count_0=outlier_count_L{i}(outlier_0);
`for j=1:length(outlier_region_0)
`if outlier_region_0(j)<chrlength(i)
`
`if outlier_region_0_up(j)>=chrlength(i)
`
`
`
`outlier_region_0_up(j)=chrlength(i)-1;
`
`end
`
`fprintf(fid, •%s\t•,chrname{i}(l:(end-1)));
`fprintf(fid, •%d\t•, outlier_region_0(j));
`fprintf(fid, 1%d\t•, outlier_region_0_up(j));
`fprintf(fid, 1%d\n•, outlier_count_0(j));
`
`end
`
`end
`
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`% end
`%
`% fclose(fid);
`%
`% % write coordinates of overrepresented regions to file
`% fid=fopen(•overrepresentedregion.txt•, •wt•);
`%
`%
`%
`% for i=l:length(chrname)
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`% end
`%
`% fclose(fid);
`%
`%
`%
`%
`% % find out the number of identical reads and their locations
`% unique_read=cell(24,1);
`% nonunique_read=cell(24,1);
`% nonunique_read_count=cell(24,1);
`%
`% fid=fopen(•repetitive_reads.txt•, •wt•);
`%
`% for i=l:length(chrname)
`%
`filename=sprintf(•read_%s_coord•,chrname{i}(l:(end-1)));
`
`end
`fprintf(fid, •%s\t•,chrname{i}(l:(end-1)));
`fprintf(fid, •%d\t•, outlier_region_U{i}(j));
`fprintf(fid, •%d\t•, oulier_region_up(j));
`fprintf(fid, 1 %d\n•, outlier_count_U{i}(j));
`
`outlier_region_up=outlier_region_U{i}+50000;
`
`for j=l:length(outlier_region_U{i})
`if outlier_region_U{i}(j)<chrlength(i)
`if outlier_region_up(j)>=chrlength(i)
`outlier_region_up(j)=chrlength(i)-1;
`
`end
`
`end
`
`
`
`load {filename);
`unique_read{i}=unique{read_chr);
`n=histc{read_chr, unique_read{i});
`nonunique_index=find{n~=l);
`nonunique_read{i}=unique_read{i}{nonunique_index);
`nonunique_read_count{i}=n{nonunique_index);
`[nonunique_read_count{i} sortid]=sort{nonunique_read_count{i});
`nonunique_read{i}=nonunique_read{i}{sortid);
`for j=l:length{nonunique_read{i})
`fprintf{fid, •%s\t•, chrname{i}{1:{end-1)));
`fprintf{fid, 1%d\t•, nonunique_read{i}{j));
`fprintf{fid, •%d\n•, nonunique_read_count{i}{j));
`
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%filename=sprintf(•repetitive_read_%s•,chrname{i}{1:
`{end-1)));
`%
`%save{filename, •nonunique_read•, •nonunique_read_count•);
`% end
`%
`%
`% fclose{fid);
`%
`
`end
`
`
`
`% look at the distribution of reads across each chromsome
`% ignore golden path
`
`1 chr6, 1
`1 chr5, 1
`1 chr4, 1
`1 chr3, 1
`1 chr2, 1
`chrname={ 1 chr1, 1
`1 Chr9, 1
`1 Chr10, 1
`1 Chr11, 1
`1 Chr12, 1
`1 Chr13, 1
`1 Chr14, 1
`I chr15, I 1
`I chr16, I 1
`I chr17 I
`I chr18, I 1
`I chr19, I 1
`I chr20, I I
`hr22, 1
`1 ChrX, 1
`1 ChrY, 1
`
`1
`
`1
`
`1
`
`1
`
`,
`
`,
`
`} ;
`
`I 1
`
`1
`
`1
`
`1
`
`1
`
`I
`
`1
`
`I
`
`I
`
`1
`
`I
`
`1 chr7 1
`
`1
`
`I
`
`1 chr8
`
`I
`
`o
`
`•
`
`•
`
`I chr21, I I
`
`I C
`
`chrlength=[247249719
`242951149
`199501827
`191273063
`180857866
`170899992
`158821424
`146274826
`140273252
`135374737
`134452384
`132349534
`114142980
`106368585
`100338915
`88827254
`78774742
`76117153
`63811651
`62435964
`46944323
`49691432
`154913754
`57772954];
`
`%load gapregions
`
`%noreadregioncount=cell(24,1);
`%noreadregion=cell(24,1);
`
`readcount=cell(24,1);
`%X=cell(24,1);
`
`for i=1:length(chrname)
`filename=sprintf( 1%s_coord 1 ,chrname{i}(1:(end-1)));
`load(filename);
`bin=[0:20000:chrlength(i)];
`readcount{i}=histc(chr_coord, bin);
`figure, bar( bin, readcount{i});
`
`%bin{i}=[1:50000:gapbegin{i}(1)];
`%
`
`
`
`%X{i}=[1:50000:chrlength(i)];
`%readcount{i}=histc(read_chr, X{i});
`%distrib=histc(read_chr, X);
`%noread=find(distrib==0);
`%noreadcoord=X(noread);
`%Y=[1:50000:chrlength(i)];
`%[noreadregioncount{i} noreadregion{i}]=hist(noreadcoord,Y);
`%figure, hist(noreadcoord,Y);
`title(chrname{i}(1:(end-1)));
`xlabel(•chr coordinate•);
`ylabel(•frequency of reads•);
`
`end
`
`i=1:length(chrname)-1
`
`%for i=1:length(chrname)
`%
`tolerance=3*sqrt(median(readcount{i}));
`%
`[b c]=sort(readcount{i});
`%
`a=bin{i}(c);
`%
`outlier_region_U{i}=a(find(b>median(b)+tolerance));
`%
`outlier_region_L{i}=a(find(b<median(b)-tolerance));
`%
`outlier_count_L{i}=b(find(b<median(b)-tolerance));
`%
`outlier_count_U{i}=b(find(b>median(b)+tolerance));
`%
`mostoutlierU_count{i}=flipud(outlier_count_U{i}((end-9):end));
`%
`mostoutlierU_region{i}=flipud(outlier_region_U{i}
`((end-9):end)•);
`% end
`%
`%% save outlierU outlier_region_U outlier_count_U mostoutlierU_count
`mostoutlierU_region
`%% save outlierl outlier_region_L outlier_count_L
`%
`%
`% % write coordinates of underrepresented regions to file
`% fid=fopen(•underrepresentedregion.txt•, •wt•);
`%
`%
`% for
`%
`%
`%
`%
`%
`%
`%
`%
`%
`
`outlier_region_up=outlier_region_L{i}+50000;
`
`for j=1:length(outlier_region_up)
`if outlier_region_L{i}(j)<chrlength(i)
`
`if outlier_region_up(j)>=chrlength(i)
`outlier_region_up(j)=chrlength(i)-1;
`
`end
`
`
`
`end
`
`end
`
`outlier_count_0=outlier_count_L{i}(outlier_0);
`for j=l:length(outlier_region_0)
`if outlier_region_0(j)<chrlength(i)
`
`if outlier_region_0_up(j)>=chrlength(i)
`outlier_region_0_up(j)=chrlength(i)-1;
`
`end
`
`fprintf(fid, •%s\t•,chrname{i}(l:(end-1)));
`fprintf(fid, •%d\t•, outlier_region_L{i}(j));
`fprintf(fid, 1%d\t•, outlier_region_up(j));
`fprintf(fid, 1%d\n•, outlier_count_L{i}(j));
`
`%
`%
`%
`%
`%
`%
`%
`%
`% end
`%
`% fclose(fid);
`%
`%
`%
`%
`%
`% % write coordinates of underrepresented regions (==0) to file
`% fid=fopen(•underrepresentedregion_0.txt•, •wt•);
`%
`%
`%for i=l:length(chrname)-1
`%
`outlier_0=find(outlier_count_L{i}==0);
`%
`outlier_region_0=outlier_region_L{i}(outlier_0);
`%
`outlier_region_0_up=outlier_region_0+50000;
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`% end
`%
`% fclose(fid);
`%
`% % write coordinates of overrepresented regions to file
`% fid=fopen(•overrepresentedregion.txt•, •wt•);
`%
`%
`%
`
`fprintf(fid, •%s\t•,chrname{i}(l:(end-1)));
`fprintf(fid, 1%d\t•, outlier_region_0(j));
`fprintf(fid, 1%d\t•, outlier_region_0_up(j));
`fprintf(fid, •%d\n•, outlier_count_0(j));
`
`end
`
`end
`
`
`
`i=1:length(chrname)
`
`outlier_region_up=outlier_region_U{i}+50000;
`
`for j=1:length(outlier_region_U{i})
`if outlier_region_U{i}(j)<chrlength(i)
`if outlier_region_up(j)>=chrlength(i)
`outlier_region_up(j)=chrlength(i)-1;
`
`end
`fprintf(fid, •%s\t•,chrname{i}(1:(end-1)));
`fprintf(fid, 1%d\t•, outlier_region_U{i}(j));
`fprintf(fid, 1%d\t•, oulier_region_up(j));
`fprintf(fid, •%d\n•, outlier_count_U{i}(j));
`
`% for
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`%
`% end
`%
`% fclose(fid);
`%
`%
`%
`%
`% % find out the number of identical reads and their locations
`% unique_read=cell(24,1);
`% nonunique_read=cell(24,1);
`% nonunique_read_count=cell(24,1);
`%
`% fid=fopen(•repetitive_reads.txt•, •wt•);
`%
`%for i=1:length(chrname)
`%
`filename=sprintf(•read_%s_coord•,chrname{i}(1:(end-1)));
`%
`load (filename);
`%
`unique_read{i}=unique(read_chr);
`%
`n=histc(read_chr, unique_read{i});
`%
`nonunique_index=find(n~=1);
`%
`nonunique_read{i}=unique_read{i}(nonunique_index);
`%
`nonunique_read_count{i}=n(nonunique_index);
`%
`[nonunique_read_count{i} sortid]=sort(nonunique_read_count{i});
`%
`nonunique_read{i}=nonunique_read{i}(sortid);
`%
`for j=1:length(nonunique_read{i})
`%
`fprintf(fid, •%s\t•, chrname{i}(1:(end-1)));
`%
`fprintf(fid, 1%d\t•, nonunique_read{i}(j));
`%
`fprintf(fid, 1 %d\n•, nonunique_read_count{i}(j));
`%
`%
`%filename=sprintf(•repetitive_read_%s•,chrname{i}(1:
`(end-1)));
`%
`%save(filename, •nonunique_read•, •nonunique_read_count•);
`% end
`%
`%
`
`end
`
`end
`
`end
`
`
`
`% fclose(fid);
`% fclose(fid);
`o\O
`%
`
`
`
`% look at the distribution of reads across each chromsome
`% ignore golden path
`
`1 Chr6 1 1 1 1 Chr7 1 1 1 1 Chr8
`1 Chr4 1 1 1 1 Chr51 I 1
`1 Chr3 1 1
`1 Chr21 1
`chrname={ 1 chr11 1
`1 Chr101 1
`1 Chr9 1 1
`1 Chr11 1 1
`1 Chr12 1 1
`1 Chr13 1 1
`1 Chr14 1 1
`1 Chr15 1 1 1 1 Chr161 1
`1 Chr17 1 1
`1 Chr18 1 1 1 1 Chr19 1 1 1 1 Chr20 1 I 1
`hr221 1
`1 chrX1 1 1 1 chrY1 1
`
`I
`
`I
`
`I
`
`I
`
`I
`
`o o o
`
`1 Chr21 1 I 1
`
`1 C
`
`1
`
`1
`
`1
`
`I
`
`I
`
`1
`
`I
`
`I
`
`1
`
`I
`
`} ;
`
`chrlength=[247249719
`242951149
`199501827
`191273063
`180857866
`170899992
`158821424
`146274826
`140273252
`135374737
`134452384
`132349534
`114142980
`106368585
`100338915
`88827254
`78774742
`76117153
`63811651
`62435964
`46944323
`49691432
`154913754
`57772954];
`
`load gapregions
`
`%noreadregioncount=cell(24 11);
`%noreadregion=cell(24 11);
`
`readcount=cell(24 11);
`%X=cell(24 11);
`
`for i=1:length(chrname)
`filename=sprintf( 1 read_%s_coord 1 1Chrname{i}(1:(end-1)));
`load (filename) ;
`bin{i}=[1:50000:gapbegin{i}(1)];
`for j=1:(length(gapbegin{i})-1)
`bin_add=[gapend{i}(j):50000:gapbegin{i}(j+1)];
`bin{i}=[bin{i} bin_add];
`
`end
`bin_add=[gapend{i}(end):50000:chrlength(i)];
`
`
`
`bin{i}=[bin{i} bin_add];
`
`readcount{i}=histc(read_chr, bin{i});
`
`%X{i}=[1:50000:chrlength(i)];
`%readcount{i}=histc(read_chr, X{i});
`%distrib=histc(read_chr, X);
`%noread=find(distrib==0);
`%noreadcoord=X(noread);
`%Y=[1:50000:chrlength(i)];
`%[noreadregioncount{i} noreadregion{i}]=hist(noreadcoord,Y);
`%figure, hist(noreadcoord,Y);
`%title(chrname{i}(l:(end-l)));
`%xlabel('chr coordinate');
`%ylabel('frequency of reads');
`
`end
`
`for i=l:length(chrname)
`tolerance=3*sqrt(median(readcount{i}));
`[b c]=sort(readcount{i});
`a=bin{i}(c);
`outlier_region_U{i}=a(find(b>median(b)+tolerance));
`outlier_region_L{i}=a(find(b<median(b)-tolerance));
`outlier_count_L{i}=b(find(b<median(b)-tolerance));
`outlier_count_U{i}=b(find(b>median(b)+tolerance));
`mostoutlierU_count{i}=flipud(outlier_count_U{i}((end-9):end));
`mostoutlierU_region{i}=flipud(outlier_region_U{i}((end-9):end)');
`
`end
`
`% save outlierU outlier_region_U outlier_count_U mostoutlierU_count
`mostoutlierU_region
`% save outlierl outlier_region_L outlier_count_L
`
`% write coordinates of underrepresented regions to file
`fid=fopen('underrepresentedregion.txt', 'wt');
`
`for i=l:length(chrname)-1
`
`outlier_region_up=outlier_region_L{i}+50000;
`
`for j=l:length(outlier_region_up)
`if outlier_region_L{i}(j)<chrlength(i)
`
`
`
`if outlier_region_up(j)>=chrlength(i)
`outlier_region_up(j)=chrlength(i)-1;
`
`end
`
`fprintf(fid, '%s\t',chrname{i}(1:(end-1)));
`fprintf(fid, '%d\t', outlier_region_L{i}(j));
`fprintf(fid, '%d\t', outlier_region_up(j));
`fprintf(fid, '%d\n', outlier_count_L{i}(j));
`
`end
`
`end
`
`end
`
`fclose(fid);
`
`% write coordinates of underrepresented regions (==0) to file
`fid=fopen('underrepresentedregion_0.txt', 'wt');
`
`for i=1:length(chrname)-1
`outlier_0=find(outlier_count_L{i}==0);
`outlier_region_0=outlier_region_L{i}(outlier_0);
`outlier_region_0_up=outlier_region_0+50000;
`
`outlier_count_0=outlier_count_L{i}(outlier_0);
`for j=1:length(outlier_region_0)
`if outlier_region_0(j)<chrlength(i)
`
`if outlier_region_0_up(j)>=chrlength(i)
`outlier_region_0_up(j)=chrlength(i)-1;
`
`end
`
`fprintf(fid, '%s\t',chrname{i}(1:(end-1)));
`fprintf(fid, '%d\t', outlier_region_0(j));
`fprintf(fid, '%d\t', outlier_region_0_up(j));
`fprintf(fid, '%d\n', outlier_count_0(j));
`
`end
`
`end
`
`end
`
`fclose(fid);
`
`% write coordinates of overrepresented regions to file
`fid=fopen('overrepresentedregion.txt', 'wt');
`
`
`
`for i=1:length(chrname)
`
`outlier_region_up=outlier_region_U{i}+50000;
`
`for j=l:length(outlier_region_U{i})
`if outlier_region_U{i}(j)<chrlength(i)
`if outlier_region_up(j)>=chrlength(i)
`outlier_region_up(j)=chrlength(i)-1;
`
`end
`fprintf(fid, '%s\t',chrname{i}(1:(end-1)));
`fprintf(fid, '%d\t', outlier_region_U{i}(j));
`fprintf(fid, '%d\t', oulier_region_up(j));
`fprintf(fid, '%d\n', outlier_count_U{i}(j));
`
`end
`
`end
`
`end
`
`fclose(fid);
`
`% find out the number of identical reads and their locations
`unique_read=cell(24,1);
`nonunique_read=cell(24,1);
`nonunique~read_count=cell(24,1);
`
`fid=fopen('repetitive_reads.txt', 'wt');
`
`for i=1:length(chrname)
`filename=sprintf('read_%s_coord',chrname{i}(1:(end-1)));
`load (filename);
`unique_read{i}=unique(read_chr);
`n=histc(read_chr, unique_read{i});
`nonunique_index=find(n~=1);
`nonunique_read{i}=unique_read{i}(nonunique_index);
`nonunique_read_count{i}=n(nonunique_index);
`[nonunique_read_count{i} sortid]=sort(nonunique_read_count{i});
`nonunique_read{i}=nonunique_read{i}(sortid);
`for j=1:length(nonunique_read{i})
`fprintf(fid, '%s\t', chrname{i}(l:(end-1)));
`fprintf(fid, '%d\t', nonunique_read{i}(j));
`fprintf(fid, '%d\n', nonunique_read_count{i}(j));
`
`end
`
`end
`
`%filename=sprintf('repetitive_read_%s',chrname{i}(1:(end-1)));
`%save(filename,
`'nonunique_read',
`'nonunique_read_count');
`
`
`
`fclose(fid);
`fclose(fid);
`
`