-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
towards wobbling long-period tandems
- Loading branch information
Showing
5 changed files
with
189 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package de.mpi_cbg.revant.apps; | ||
|
||
import de.mpi_cbg.revant.util.IO; | ||
import java.io.*; | ||
|
||
/** | ||
* Like $Wobble.java$. | ||
*/ | ||
public class WobbleLongPeriod { | ||
|
||
public static void main(String[] args) throws IOException { | ||
final String TRANSLATED_READS_CHARACTERS_FILE = args[0]; // Of a chunk of reads | ||
final int WOBBLE_LENGTH = Integer.parseInt(args[1]); | ||
final String ALPHABET_FILE_OLD = args[2]; // Of all reads | ||
final String ALPHABET_FILE_NEW = args[3]; // Of all reads | ||
final String ALPHABET_FILE_OLD2NEW = args[4]; // Of all reads | ||
final String REPEAT_LENGTHS_FILE = args[5]; | ||
final int N_REPEATS = Integer.parseInt(args[6]); | ||
final String TANDEMS_FILE = args[7]; // Of a chunk of reads. Non-periodic only. | ||
final String OUTPUT_FILE = args[8]; // Of a chunk of reads | ||
|
||
int i; | ||
int nBlocks, lastUnique_new, lastPeriodic_new, lastAlphabet_new; | ||
String str1, str2; | ||
BufferedReader br1, br2; | ||
BufferedWriter bw; | ||
int[] old2new, tmpArray1, tmpArray2, tmpArray3; | ||
RepeatAlphabet.Character[] alphabet_new; | ||
|
||
RepeatAlphabet.loadRepeatLengths(REPEAT_LENGTHS_FILE,N_REPEATS); | ||
RepeatAlphabet.deserializeAlphabet(ALPHABET_FILE_NEW,2); | ||
alphabet_new=RepeatAlphabet.alphabet; lastUnique_new=RepeatAlphabet.lastUnique; lastPeriodic_new=RepeatAlphabet.lastPeriodic; lastAlphabet_new=RepeatAlphabet.lastAlphabet; | ||
RepeatAlphabet.deserializeAlphabet(ALPHABET_FILE_OLD,2); | ||
old2new = new int[lastAlphabet_new+1]; | ||
br1 = new BufferedReader(new FileReader(ALPHABET_FILE_OLD2NEW)); | ||
for (i=0; i<=RepeatAlphabet.lastAlphabet; i++) old2new[i]=Integer.parseInt(br1.readLine()); | ||
br1.close(); | ||
tmpArray1 = new int[100]; // Arbitrary | ||
tmpArray2 = new int[RepeatAlphabet.lastAlphabet+1]; | ||
tmpArray3 = new int[] {0,0}; | ||
br1 = new BufferedReader(new FileReader(TRANSLATED_READS_CHARACTERS_FILE)); | ||
br2 = new BufferedReader(new FileReader(TANDEMS_FILE)); | ||
bw = new BufferedWriter(new FileWriter(OUTPUT_FILE)); | ||
str1=br1.readLine(); str2=br2.readLine(); | ||
while (str1!=null) { | ||
nBlocks=1+((str1.length()+1)>>1); // Loose upper bound | ||
if (tmpArray1.length<nBlocks) tmpArray1 = new int[nBlocks]; | ||
RepeatAlphabet.wobble_longPeriod(str1,str2,WOBBLE_LENGTH,IO.quantum,old2new,alphabet_new,lastUnique_new,lastPeriodic_new,lastAlphabet_new,bw,tmpArray1,tmpArray2,tmpArray3); | ||
str1=br1.readLine(); str2=br2.readLine(); | ||
} | ||
br1.close(); br2.close(); bw.close(); | ||
System.err.println("Applied wobbling to "+tmpArray3[0]+" blocks out of "+tmpArray3[1]+" total ("+((100.0*tmpArray3[0])/tmpArray3[1])+"%)"); | ||
} | ||
|
||
} |
58 changes: 58 additions & 0 deletions
58
src/de/mpi_cbg/revant/apps/WobbleLongPeriodCreateAlphabet1.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package de.mpi_cbg.revant.apps; | ||
|
||
import de.mpi_cbg.revant.util.Math; | ||
import de.mpi_cbg.revant.util.IO; | ||
import java.io.*; | ||
|
||
/** | ||
* Marks all characters of the alphabet that are adjacent to a long-period tandem or that | ||
* belong to a long-period tandem in some translation, and prints the corresponding | ||
* bitvector in output. | ||
* | ||
* This is designed to work on a chunk of reads. | ||
*/ | ||
public class WobbleLongPeriodCreateAlphabet1 { | ||
|
||
public static void main(String[] args) throws IOException { | ||
final String ALPHABET_FILE = args[0]; // Of all reads | ||
final String TRANSLATED_READS_CHARACTERS_FILE = args[1]; // Of a chunk of reads | ||
final String TANDEMS_FILE = args[2]; // Of a chunk of reads. Non-periodic only. | ||
final String READ_LENGTHS_FILE = args[3]; // Of a chunk of reads | ||
final String OUTPUT_FILE = args[4]; | ||
|
||
int i; | ||
int nBlocks, nFlags; | ||
String str1, str2, str3; | ||
RepeatAlphabet.Character tmpCharacter; | ||
BufferedReader br1, br2, br3; | ||
BufferedWriter bw; | ||
boolean[] flags; | ||
int[] tmpArray; | ||
|
||
tmpCharacter = new RepeatAlphabet.Character(); | ||
RepeatAlphabet.deserializeAlphabet(ALPHABET_FILE,2); | ||
flags = new boolean[RepeatAlphabet.lastAlphabet+1]; | ||
Math.set(flags,RepeatAlphabet.lastAlphabet,false); | ||
if (RepeatAlphabet.lastAlphabet>RepeatAlphabet.lastPeriodic) { | ||
tmpArray = new int[100]; // Arbitrary | ||
br1 = new BufferedReader(new FileReader(TRANSLATED_READS_CHARACTERS_FILE)); | ||
br2 = new BufferedReader(new FileReader(TANDEMS_FILE)); | ||
br3 = new BufferedReader(new FileReader(READ_LENGTHS_FILE)); | ||
str1=br1.readLine(); str2=br2.readLine(); str3=br3.readLine(); | ||
i=0; | ||
while (str1!=null) { | ||
nBlocks=1+((str1.length()+1)>>1); // Loose upper bound | ||
if (tmpArray.length<nBlocks) tmpArray = new int[nBlocks]; | ||
RepeatAlphabet.wobble_longPeriod_markAlphabet(str1,str2,Integer.parseInt(str3),flags,tmpCharacter,tmpArray); | ||
i++; | ||
if (i%10000==0) System.err.println("Processed "+i+" reads"); | ||
str1=br1.readLine(); str2=br2.readLine(); str3=br3.readLine(); | ||
} | ||
br1.close(); br2.close(); br3.close(); | ||
} | ||
bw = new BufferedWriter(new FileWriter(OUTPUT_FILE)); | ||
for (i=0; i<=RepeatAlphabet.lastAlphabet; i++) bw.write(flags[i]?"1\n":"0\n"); | ||
bw.close(); | ||
} | ||
|
||
} |