Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
fcunial committed Aug 21, 2023
1 parent 679709a commit e558e16
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 188 deletions.
4 changes: 2 additions & 2 deletions scripts/6-repeatAlphabet/1-buildAlphabet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ done


echo "Trying to fix tandem spacers if needed..."
TANDEM_SPACERS_ITERATIONS="1"
TANDEM_SPACERS_ITERATIONS="3"
NONREPETITIVE_BLOCKS_MODE="2"
CONCATENATE_THRESHOLD="200"
LONG_SPACER_LENGTH=$(( ${MIN_ALIGNMENT_LENGTH} * 20 )) # Arbitrary
Expand Down Expand Up @@ -387,7 +387,7 @@ if [ ${WOBBLE_LENGTH} -ne 0 ]; then
TO=$(( ${N_THREADS} - 1 ))
fi
echo "Computing tandem track..."
for THREAD in $(seq 0 ${TO}); do
for THREAD_ID in $(seq 0 ${TO}); do
tandemsThread ${TMPFILE_PATH}-wobble-1-${THREAD_ID}.txt ${TMPFILE_PATH}-wobble-2-${THREAD_ID}.txt ${TMPFILE_PATH}-wobble-3-lengths-${THREAD_ID}.txt ${TMPFILE_PATH}-wobble-4-${THREAD_ID}.txt &
done
wait
Expand Down
5 changes: 2 additions & 3 deletions scripts/6-repeatAlphabet/2-fixEndBlocks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ N_HAPLOTYPES=$9
TIGHT_MODE="0"
SPANNING_BPS="150" # Bps before and after a k-mer to consider it observed in a read.
# ------------------------------------ REVANT --------------------------------------------
REVANT_LIBRARIES="${REVANT_BINARIES}/../lib"
REVANT_LIBRARIES="${REVANT_LIBRARIES}/commons-numbers-gamma-1.1.jar:${REVANT_LIBRARIES}/commons-rng-sampling-1.5.jar:${REVANT_LIBRARIES}/commons-statistics-distribution-1.0.jar"
REVANT_LIBRARIES="${REVANT_BINARIES}/../lib/*"
# ----------------------------------------------------------------------------------------

set -o pipefail; set -e; set -u
Expand Down Expand Up @@ -94,7 +93,7 @@ for K in $(seq ${MIN_K} ${MAX_K}); do
FREQUENT_KMERS_FILE="${INPUT_DIR}/frequent-k${K}.txt"
OUTPUT_FILE_HISTOGRAM="${INPUT_DIR}/histogram-k${K}.txt"
echo "Finding frequent ${K}-mers..."
java ${JAVA_RUNTIME_FLAGS} -classpath "${REVANT_BINARIES}:${REVANT_LIBRARIES}" de.mpi_cbg.revant.apps.CompactKmers ${TMPFILE_PATH}-${K}.txt ${K} ${GENOME_LENGTH} ${N_HAPLOTYPES} ${N_READS} ${AVG_READ_LENGTH} ${SPANNING_BPS} 0 ${ALPHABET_FILE} 1 10000 ${FREQUENT_KMERS_FILE} ${OUTPUT_FILE_HISTOGRAM}
java ${JAVA_RUNTIME_FLAGS} -classpath "${REVANT_BINARIES}:${REVANT_LIBRARIES}" de.mpi_cbg.revant.apps.CompactKmers ${TMPFILE_PATH}-${K}.txt ${K} ${GENOME_LENGTH} ${N_HAPLOTYPES} ${N_READS} ${AVG_READ_LENGTH} ${SPANNING_BPS} -1 0 ${ALPHABET_FILE} 1 10000 ${FREQUENT_KMERS_FILE} ${OUTPUT_FILE_HISTOGRAM}
echo "Computing $((${K}-1))-mers..."
K_MINUS_ONE_MERS_FILE="${INPUT_DIR}/kMinusOne-k${K}.txt"
java ${JAVA_RUNTIME_FLAGS} -classpath "${REVANT_BINARIES}" de.mpi_cbg.revant.apps.GetKMinusOneMers ${ALPHABET_FILE} ${FREQUENT_KMERS_FILE} ${K} ${K_MINUS_ONE_MERS_FILE}
Expand Down
8 changes: 4 additions & 4 deletions scripts/6-repeatAlphabet/3-getUniqueSubstrings.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ DELETE_TMP_FILES=$6
IDENTITY_THRESHOLD=$7
DISTANCE_THRESHOLD=$8
CHARACTER_THRESHOLD=$9
MIN_ALIGNMENT_LENGTH=${10} # Read-repeat
UNIQUE_MODE="1" # Non-repetitive blocks are allowed in a k-mer, except at the first/last
# position of the k-mer. Usually a good choice.
SPANNING_BPS="150" # Bps before and after a k-mer to consider it observed in a read.
# ------------------------------------ REVANT --------------------------------------------
REVANT_LIBRARIES="${REVANT_BINARIES}/../lib"
REVANT_LIBRARIES="${REVANT_LIBRARIES}/commons-numbers-gamma-1.1.jar:${REVANT_LIBRARIES}/commons-rng-sampling-1.5.jar:${REVANT_LIBRARIES}/commons-statistics-distribution-1.0.jar"
REVANT_LIBRARIES="${REVANT_BINARIES}/../lib/*"
# ----------------------------------------------------------------------------------------


Expand Down Expand Up @@ -72,7 +72,7 @@ function intervalsThread() {
local LOCAL_UNIQUE_KMERS_FILE=$5
local LOCAL_K_MINUS_ONE_INTERVALS_FILE=$6
local LOCAL_INTERVALS_FILE=$7
java ${JAVA_RUNTIME_FLAGS} -classpath "${REVANT_BINARIES}:${REVANT_LIBRARIES}" de.mpi_cbg.revant.apps.GetShortestUniqueIntervals ${LOCAL_K} ${LOCAL_TRANSLATED_READS_FILE} ${LOCAL_BOUNDARIES_FILE} ${LOCAL_READ_LENGTHS_FILE} ${ALPHABET_FILE} ${LOCAL_UNIQUE_KMERS_FILE} ${N_READS} ${AVG_READ_LENGTH} ${GENOME_LENGTH} ${N_HAPLOTYPES} ${IDENTITY_THRESHOLD} ${DISTANCE_THRESHOLD} ${CHARACTER_THRESHOLD} ${LOCAL_K_MINUS_ONE_INTERVALS_FILE} ${LOCAL_INTERVALS_FILE}
java ${JAVA_RUNTIME_FLAGS} -classpath "${REVANT_BINARIES}:${REVANT_LIBRARIES}" de.mpi_cbg.revant.apps.GetShortestUniqueIntervals ${LOCAL_K} ${LOCAL_TRANSLATED_READS_FILE} ${LOCAL_BOUNDARIES_FILE} ${LOCAL_READ_LENGTHS_FILE} ${ALPHABET_FILE} ${LOCAL_UNIQUE_KMERS_FILE} ${N_READS} ${AVG_READ_LENGTH} ${GENOME_LENGTH} ${N_HAPLOTYPES} ${MIN_ALIGNMENT_LENGTH} ${IDENTITY_THRESHOLD} ${DISTANCE_THRESHOLD} ${CHARACTER_THRESHOLD} ${LOCAL_K_MINUS_ONE_INTERVALS_FILE} ${LOCAL_INTERVALS_FILE}
if [ $? -ne 0 ]; then
exit
fi
Expand Down Expand Up @@ -106,7 +106,7 @@ for K in $(seq 1 ${MAX_K}); do
UNIQUE_KMERS_FILE="${INPUT_DIR}/unique-k${K}.txt"
OUTPUT_FILE_HISTOGRAM="${INPUT_DIR}/histogram-k${K}.txt"
echo "Finding unique ${K}-mers..."
java ${JAVA_RUNTIME_FLAGS} -classpath "${REVANT_BINARIES}:${REVANT_LIBRARIES}" de.mpi_cbg.revant.apps.CompactKmers ${TMPFILE_PATH}-${K}.txt ${K} ${GENOME_LENGTH} ${N_HAPLOTYPES} ${N_READS} ${AVG_READ_LENGTH} ${SPANNING_BPS} 1 ${ALPHABET_FILE} 0 ${MAX_HISTOGRAM_COUNT} ${UNIQUE_KMERS_FILE} ${OUTPUT_FILE_HISTOGRAM}
java ${JAVA_RUNTIME_FLAGS} -classpath "${REVANT_BINARIES}:${REVANT_LIBRARIES}" de.mpi_cbg.revant.apps.CompactKmers ${TMPFILE_PATH}-${K}.txt ${K} ${GENOME_LENGTH} ${N_HAPLOTYPES} ${N_READS} ${AVG_READ_LENGTH} ${SPANNING_BPS} ${MIN_ALIGNMENT_LENGTH} 1 ${ALPHABET_FILE} 0 ${MAX_HISTOGRAM_COUNT} ${UNIQUE_KMERS_FILE} ${OUTPUT_FILE_HISTOGRAM}
echo "Updating shortest unique intervals file..."
for FILE in $(find -s ${INPUT_DIR} -name "${TMPFILE_NAME}-0-*"); do
THREAD_ID=${FILE#${INPUT_DIR}/${TMPFILE_NAME}-0-}
Expand Down
2 changes: 1 addition & 1 deletion scripts/6-repeatAlphabet/master.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fi
PERIODIC_ENDPOINTS_FIXED=$(cat ${INPUT_DIR}/buildAlphabet-tmp-return.txt)
MIN_K_FOR_DISAMBIGUATION="2"; MAX_K_FOR_DISAMBIGUATION="4"
./2-fixEndBlocks.sh ${INPUT_DIR} ${BROKEN_READS} ${LOW_QUALITY_TYPE} ${MIN_K_FOR_DISAMBIGUATION} ${MAX_K_FOR_DISAMBIGUATION} ${N_THREADS} ${DELETE_TMP_FILES} ${GENOME_LENGTH} ${N_HAPLOTYPES}
./3-getUniqueSubstrings.sh ${INPUT_DIR} ${GENOME_LENGTH} ${N_HAPLOTYPES} ${MAX_K_UNIQUE} ${N_THREADS} ${DELETE_TMP_FILES} ${IDENTITY_THRESHOLD} ${DISTANCE_THRESHOLD} ${CHARACTER_THRESHOLD}
./3-getUniqueSubstrings.sh ${INPUT_DIR} ${GENOME_LENGTH} ${N_HAPLOTYPES} ${MAX_K_UNIQUE} ${N_THREADS} ${DELETE_TMP_FILES} ${IDENTITY_THRESHOLD} ${DISTANCE_THRESHOLD} ${CHARACTER_THRESHOLD} ${MIN_ALIGNMENT_LENGTH_READ_REPEAT}
./4-filterAlignments.sh ${INPUT_DIR} ${BROKEN_READS} ${PERIODIC_ENDPOINTS_FIXED} ${MIN_ALIGNMENT_LENGTH_READ_READ} ${MIN_ALIGNMENT_LENGTH_READ_REPEAT} ${MAX_K_UNIQUE} ${ALIGNMENT_FILTERING_MODE} ${MIN_INTERSECTION_NONREPETITIVE} ${N_THREADS} ${DELETE_TMP_FILES}
READ_LENGTHS_FILE="${INPUT_DIR}/reads-lengths.txt"
N_READS=$(wc -l < ${READ_LENGTHS_FILE})
Expand Down
22 changes: 21 additions & 1 deletion src/de/mpi_cbg/revant/apps/BuildAssemblyGraph.java
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ public static void main(String[] args) throws IOException {
componentSize = new int[nComponents];
Math.set(componentSize,nComponents-1,0);
for (i=0; i<N_READS; i++) componentSize[component[i]]++;
printHistogram(componentSize,nComponents);
j=-1;
for (i=0; i<nComponents; i++) {
if (componentSize[i]>=MIN_COMPONENT_SIZE) componentSize[++j]=i;
Expand Down Expand Up @@ -278,6 +279,7 @@ public static void main(String[] args) throws IOException {
componentSize = new int[nComponents];
Math.set(componentSize,nComponents-1,0);
for (i=0; i<N_READS; i++) componentSize[component[i]]++;
printHistogram(componentSize,nComponents);
j=-1;
for (i=0; i<nComponents; i++) {
if (componentSize[i]>=MIN_COMPONENT_SIZE) componentSize[++j]=i;
Expand Down Expand Up @@ -313,7 +315,25 @@ public static void main(String[] args) throws IOException {
for (i=0; i<nComponents; i++) { bws[i].write("}"); bws[i].close(); }
br1.close();
}



private static final void printHistogram(int[] componentSize, int nComponents) {
int i;
double count;
int[] tmpArray;

System.err.println("Cumulative distribution of component size:");
tmpArray = new int[nComponents];
System.arraycopy(componentSize,0,tmpArray,0,nComponents);
Arrays.sort(tmpArray);
count=1.0;
for (i=1; i<nComponents; i++) {
if (tmpArray[i]!=tmpArray[i-1]) System.err.println(tmpArray[i-1]+","+(count/nComponents));
count++;
}
System.err.println(tmpArray[nComponents-1]+",1");
}


/**
* Adds $to$ to $from$.
Expand Down
4 changes: 2 additions & 2 deletions src/de/mpi_cbg/revant/apps/CollectKmers.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public static void main(String[] args) throws IOException {
else lastAvoidedInterval=-1;
RepeatAlphabet.loadBoundaries(str3);
readLength=Integer.parseInt(str4);
RepeatAlphabet.getKmers(str1,K,kmers,null,avoidedIntervals,lastAvoidedInterval,readLength,-1/*argument not used*/,-1/*argument not used*/,-1/*argument not used*/,-1/*argument not used*/,RepeatAlphabet.boundaries,-1/*argument not used*/,-1/*argument not used*/,-1.0/*argument not used*/,tmpKmer,tmpArray2,tmpArray3,tmpMap,tmpChar);
RepeatAlphabet.getKmers(str1,K,kmers,null,avoidedIntervals,lastAvoidedInterval,readLength,-1/*argument not used*/,-1/*argument not used*/,-1/*argument not used*/,-1/*argument not used*/,-1/*argument not used*/,RepeatAlphabet.boundaries,-1/*argument not used*/,-1/*argument not used*/,-1.0/*argument not used*/,tmpKmer,tmpArray2,tmpArray3,tmpMap,tmpChar);
str1=br1.readLine(); str2=INTERVALS_FILE_EXISTS?br2.readLine():null;
str3=br3.readLine(); str4=br4.readLine(); row++;
}
Expand All @@ -77,7 +77,7 @@ public static void main(String[] args) throws IOException {
kmers.keySet().toArray(keys);
if (nKmers>1) Arrays.sort(keys,0,nKmers);
bw = new BufferedWriter(new FileWriter(KMERS_FILE));
for (i=0; i<nKmers; i++) bw.write(keys[i].toString()+(RepeatAlphabet.SEPARATOR_MINOR+"")+keys[i].count+(RepeatAlphabet.SEPARATOR_MINOR+"")+keys[i].sameReadCount+"\n");
for (i=0; i<nKmers; i++) bw.write(keys[i].toString()+(RepeatAlphabet.SEPARATOR_MINOR+"")+keys[i].count+(RepeatAlphabet.SEPARATOR_MINOR+"")+keys[i].countPartial+(RepeatAlphabet.SEPARATOR_MINOR+"")+keys[i].sameReadCount+"\n");
bw.close();
}

Expand Down
51 changes: 28 additions & 23 deletions src/de/mpi_cbg/revant/apps/CompactKmers.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
*/
public class CompactKmers {
/**
* @param args 9 TRUE=keep every k-mer that passes a one-sided significance test in
* @param args 10 TRUE=keep every k-mer that passes a one-sided significance test in
* the model where it occurs on just one haplotype.
*/
public static void main(String[] args) throws IOException {
Expand All @@ -23,18 +23,20 @@ public static void main(String[] args) throws IOException {
final int N_READS = Integer.parseInt(args[4]);
final int AVG_READ_LENGTH = Integer.parseInt(args[5]);
final int SPANNING_BPS = Integer.parseInt(args[6]);
final boolean DISCARD_SAME_READ_KMERS = Integer.parseInt(args[7])==1;
final String ALPHABET_FILE = args[8];
final boolean KEEP_ALL_FREQUENT = Integer.parseInt(args[9])==1;
final int MAX_HISTOGRAM_COUNT = Integer.parseInt(args[10]);
final String OUTPUT_FILE_KMERS = args[11];
final String OUTPUT_FILE_HISTOGRAM = args[12].equalsIgnoreCase("null")?null:args[12];
final int MIN_ALIGNMENT_LENGTH = Integer.parseInt(args[7]); // Read-repeat
final boolean DISCARD_SAME_READ_KMERS = Integer.parseInt(args[8])==1;
final String ALPHABET_FILE = args[9];
final boolean KEEP_ALL_FREQUENT = Integer.parseInt(args[10])==1;
final int MAX_HISTOGRAM_COUNT = Integer.parseInt(args[11]);
final String OUTPUT_FILE_KMERS = args[12];
final String OUTPUT_FILE_HISTOGRAM = args[13].equalsIgnoreCase("null")?null:args[13];

final double SIGNIFICANCE_LEVEL = 0.05; // Conventional
final String SEPARATOR = ",";

boolean equal;
int i;
int count, previousCount, sameReadCount, previousSameReadCount;
int count, countPartial, previousCount, previousCountPartial, sameReadCount, previousSameReadCount;
String str;
BufferedReader br;
BufferedWriter bw;
Expand All @@ -58,50 +60,53 @@ public static void main(String[] args) throws IOException {
else histogram=null;
bw = new BufferedWriter(new FileWriter(OUTPUT_FILE_KMERS));
previous = new int[K];
tokens=str.split(",");
tokens=str.split(SEPARATOR);
for (i=0; i<K; i++) previous[i]=Integer.parseInt(tokens[i]);
previousCount=Integer.parseInt(tokens[K]);
previousSameReadCount=Integer.parseInt(tokens[K+1]);
previousCountPartial=Integer.parseInt(tokens[K+1]);
previousSameReadCount=Integer.parseInt(tokens[K+2]);
current = new int[K];
str=br.readLine();
while (str!=null) {
tokens=str.split(",");
tokens=str.split(SEPARATOR);
equal=true;
for (i=0; i<K; i++) {
current[i]=Integer.parseInt(tokens[i]);
if (current[i]!=previous[i]) equal=false;
}
count=Integer.parseInt(tokens[K]);
sameReadCount=Integer.parseInt(tokens[K+1]);
countPartial=Integer.parseInt(tokens[K+1]);
sameReadCount=Integer.parseInt(tokens[K+2]);
if (equal) {
previousCount+=count;
previousCountPartial+=countPartial;
previousSameReadCount=Math.max(previousSameReadCount,sameReadCount);
}
else {
kmer.set(previous,K,previousCount);
kmer.set(previous,K,previousCount,previousCountPartial,previousSameReadCount);
if ( (DISCARD_SAME_READ_KMERS?previousSameReadCount==1:true) &&
(KEEP_ALL_FREQUENT?kmer.isFrequent(K,N_READS,AVG_READ_LENGTH,SPANNING_BPS,GENOME_LENGTH,N_HAPLOTYPES,SIGNIFICANCE_LEVEL):(kmer.isUnique(K,N_READS,AVG_READ_LENGTH,SPANNING_BPS,GENOME_LENGTH,N_HAPLOTYPES,SIGNIFICANCE_LEVEL)!=-1))
(KEEP_ALL_FREQUENT?kmer.isFrequent(K,N_READS,AVG_READ_LENGTH,SPANNING_BPS,GENOME_LENGTH,N_HAPLOTYPES,SIGNIFICANCE_LEVEL):(kmer.isUnique(K,N_READS,AVG_READ_LENGTH,SPANNING_BPS,GENOME_LENGTH,N_HAPLOTYPES,MIN_ALIGNMENT_LENGTH,SIGNIFICANCE_LEVEL)!=-1))
) {
for (i=0; i<K; i++) bw.write(previous[i]+",");
bw.write(previousCount+"\n");
for (i=0; i<K; i++) bw.write(previous[i]+SEPARATOR);
bw.write(previousCount+SEPARATOR+previousCountPartial+SEPARATOR+previousSameReadCount+"\n");
}
if (OUTPUT_FILE_HISTOGRAM!=null) histogram[previousCount>MAX_HISTOGRAM_COUNT?MAX_HISTOGRAM_COUNT:previousCount]++;
if (OUTPUT_FILE_HISTOGRAM!=null) histogram[previousCount+previousCountPartial>MAX_HISTOGRAM_COUNT?MAX_HISTOGRAM_COUNT:previousCount+previousCountPartial]++;
tmpArray=previous; previous=current; current=tmpArray;
previousCount=count; previousSameReadCount=sameReadCount;
previousCount=count; previousCountPartial=countPartial; previousSameReadCount=sameReadCount;
}
str=br.readLine();
}
br.close();
kmer.set(previous,K,previousCount);
kmer.set(previous,K,previousCount,previousCountPartial,previousSameReadCount);
if ( (DISCARD_SAME_READ_KMERS?previousSameReadCount==1:true) &&
(KEEP_ALL_FREQUENT?kmer.isFrequent(K,N_READS,AVG_READ_LENGTH,SPANNING_BPS,GENOME_LENGTH,N_HAPLOTYPES,SIGNIFICANCE_LEVEL):(kmer.isUnique(K,N_READS,AVG_READ_LENGTH,SPANNING_BPS,GENOME_LENGTH,N_HAPLOTYPES,SIGNIFICANCE_LEVEL)!=-1))
(KEEP_ALL_FREQUENT?kmer.isFrequent(K,N_READS,AVG_READ_LENGTH,SPANNING_BPS,GENOME_LENGTH,N_HAPLOTYPES,SIGNIFICANCE_LEVEL):(kmer.isUnique(K,N_READS,AVG_READ_LENGTH,SPANNING_BPS,GENOME_LENGTH,N_HAPLOTYPES,MIN_ALIGNMENT_LENGTH,SIGNIFICANCE_LEVEL)!=-1))
) {
for (i=0; i<K; i++) bw.write(previous[i]+",");
bw.write(previousCount+"\n");
for (i=0; i<K; i++) bw.write(previous[i]+SEPARATOR);
bw.write(previousCount+SEPARATOR+previousCountPartial+SEPARATOR+previousSameReadCount+"\n");
}
bw.close();
if (OUTPUT_FILE_HISTOGRAM!=null) {
histogram[previousCount>MAX_HISTOGRAM_COUNT?MAX_HISTOGRAM_COUNT:previousCount]++;
histogram[previousCount+previousCountPartial>MAX_HISTOGRAM_COUNT?MAX_HISTOGRAM_COUNT:previousCount+previousCountPartial]++;
bw = new BufferedWriter(new FileWriter(OUTPUT_FILE_HISTOGRAM));
for (i=0; i<=MAX_HISTOGRAM_COUNT; i++) bw.write(i+","+histogram[i]+"\n");
bw.close();
Expand Down
28 changes: 28 additions & 0 deletions src/de/mpi_cbg/revant/apps/FixTandemSpacers1.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,39 @@ public static void main(String[] args) throws IOException {
RepeatAlphabet.loadReadsFully(FULLY_UNIQUE_FILE,N_FULLY_UNIQUE,FULLY_CONTAINED_FILE,N_FULLY_CONTAINED);
RepeatAlphabet.loadTandemIntervals(TANDEMS_FILE,N_READS);
RepeatAlphabet.loadTandemSpacers(NONREPETITIVE_BLOCKS_MODE);


for (int x=0; x<=RepeatAlphabet.lastSpacer; x++) {
if (RepeatAlphabet.spacers[x].read==42) System.err.println("VITTU> 1 "+RepeatAlphabet.spacers[x]);
}

RepeatAlphabet.loadTandemSpacers_blocks(READ_READ_ALIGNMENTS_FILE,DISTANCE_THRESHOLD,LONG_SPACER_LENGTH,NONREPETITIVE_BLOCKS_MODE,tmpArray);


for (int x=0; x<=RepeatAlphabet.lastSpacer; x++) {
if (RepeatAlphabet.spacers[x].read==42) System.err.println("VITTU> 2 "+RepeatAlphabet.spacers[x]);
}


if (RepeatAlphabet.lastSpacer==-1) { System.out.println("1"); return; }
RepeatAlphabet.loadFullyContainedTranslation(TRANSLATED_READS_CHARACTERS_FILE,N_FULLY_CONTAINED);
if (RepeatAlphabet.loadTandemSpacerNeighbors(READ_READ_ALIGNMENTS_FILE,NONREPETITIVE_BLOCKS_MODE,tmpArray)==0) { System.out.println("2"); return; }


for (int x=0; x<=RepeatAlphabet.lastSpacer; x++) {
if (RepeatAlphabet.spacers[x].read==42) System.err.println("VITTU> 3 "+RepeatAlphabet.spacers[x]+" lastSpacerNeighbor="+RepeatAlphabet.lastSpacerNeighbor[x]);
}



if (!RepeatAlphabet.propagateSolutions(DISTANCE_THRESHOLD_CONSISTENCY)) { System.out.println("3"); return; }

for (int x=0; x<=RepeatAlphabet.lastSpacer; x++) {
if (RepeatAlphabet.spacers[x].read==42) System.err.println("VITTU> 4 "+RepeatAlphabet.spacers[x]);
}



RepeatAlphabet.serializeSpacers(OUTPUT_FILE);
System.out.println("0");
}
Expand Down
13 changes: 7 additions & 6 deletions src/de/mpi_cbg/revant/apps/GetShortestUniqueIntervals.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ public static void main(String[] args) throws IOException {
final int AVG_READ_LENGTH = Integer.parseInt(args[7]);
final long GENOME_LENGTH = Long.parseLong(args[8]); // One haplotype
final int N_HAPLOTYPES = Integer.parseInt(args[9]);
final int IDENTITY_THRESHOLD = Integer.parseInt(args[10]);
final int DISTANCE_THRESHOLD = Integer.parseInt(args[11]);
final double CHARACTER_FRACTION = Double.parseDouble(args[12]);
final String OLD_INTERVALS_FILE = args[13]; // NULL to discard it
final String NEW_INTERVALS_FILE = args[14]; // Output
final int MIN_ALIGNMENT_LENGTH = Integer.parseInt(args[10]); // Read-repeat
final int IDENTITY_THRESHOLD = Integer.parseInt(args[11]);
final int DISTANCE_THRESHOLD = Integer.parseInt(args[12]);
final double CHARACTER_FRACTION = Double.parseDouble(args[13]);
final String OLD_INTERVALS_FILE = args[14]; // NULL to discard it
final String NEW_INTERVALS_FILE = args[15]; // Output

boolean OLD_INTERVALS_FILE_EXISTS = !OLD_INTERVALS_FILE.equalsIgnoreCase("null");

Expand Down Expand Up @@ -94,7 +95,7 @@ public static void main(String[] args) throws IOException {
else lastUniqueInterval=-1;
RepeatAlphabet.loadBoundaries(str3);
readLength=Integer.parseInt(str4);
lastUniqueInterval=RepeatAlphabet.getKmers(str1,K,null,kmers,uniqueIntervals,lastUniqueInterval,readLength,N_READS,AVG_READ_LENGTH,GENOME_LENGTH,N_HAPLOTYPES,RepeatAlphabet.boundaries,IDENTITY_THRESHOLD,DISTANCE_THRESHOLD,CHARACTER_FRACTION,tmpKmer,tmpArray2,tmpArray3,null,tmpChar);
lastUniqueInterval=RepeatAlphabet.getKmers(str1,K,null,kmers,uniqueIntervals,lastUniqueInterval,readLength,N_READS,AVG_READ_LENGTH,GENOME_LENGTH,N_HAPLOTYPES,MIN_ALIGNMENT_LENGTH,RepeatAlphabet.boundaries,IDENTITY_THRESHOLD,DISTANCE_THRESHOLD,CHARACTER_FRACTION,tmpKmer,tmpArray2,tmpArray3,null,tmpChar);
if (lastUniqueInterval>0) {
nPairs=(lastUniqueInterval+1)/3;
if (pairs.length<nPairs) {
Expand Down
Loading

0 comments on commit e558e16

Please sign in to comment.