-
Notifications
You must be signed in to change notification settings - Fork 587
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
author Andrea Haessly <[email protected]> 1582141271 -0500 committer Andrea Haessly <[email protected]> 1611852899 -0500 parent 6d43103 author Andrea Haessly <[email protected]> 1582141271 -0500 committer Andrea Haessly <[email protected]> 1611852778 -0500 initial impl tested shift and chain make offset optional fix format of chain file make offset optional fix format of chain file add interval output
- Loading branch information
Showing
1 changed file
with
210 additions
and
0 deletions.
There are no files selected for viewing
210 changes: 210 additions & 0 deletions
210
src/main/java/org/broadinstitute/hellbender/tools/walkers/fasta/ShiftFasta.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
package org.broadinstitute.hellbender.tools.walkers.fasta; | ||
|
||
import htsjdk.samtools.SAMSequenceDictionary; | ||
import htsjdk.samtools.SAMSequenceRecord; | ||
import htsjdk.samtools.reference.FastaReferenceWriter; | ||
import htsjdk.samtools.reference.FastaReferenceWriterBuilder; | ||
import org.broadinstitute.barclay.argparser.Argument; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
import org.broadinstitute.barclay.help.DocumentedFeature; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.engine.GATKTool; | ||
import org.broadinstitute.hellbender.engine.ReferenceDataSource; | ||
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
import org.broadinstitute.hellbender.utils.io.IOUtils; | ||
import picard.cmdline.programgroups.ReferenceProgramGroup; | ||
|
||
import java.io.FileWriter; | ||
import java.io.IOException; | ||
import java.nio.file.Path; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.ListIterator; | ||
|
||
/** | ||
* Create a fasta with the bases shifted by offset | ||
* | ||
* delta1 = offset - 1 | ||
* delta2 = total - delta1 | ||
* | ||
* To shift forward: | ||
* if you are given a position in the regular fasta (pos_r) and want the position in the shifted fasta (pos_s): | ||
* if pos_r > delta1 => pos_s = pos_r - delta1 == pos_r - offset +1 | ||
* otherwise pos_s = pos_r + delta2 == pos_r + total - offset + 1 | ||
* | ||
* To shift back: | ||
* if you are given a position in the shifted fasta (pos_s) and want the position in the regular fasta (pos_r): | ||
* if pos_s > delta2 => pos_r = pos_s - delta2 == pos_s - total + offset - 1 | ||
* otherwise pos_r = pos_s + delta1 == pos_s + offset - 1 | ||
* | ||
*/ | ||
@DocumentedFeature | ||
@CommandLineProgramProperties( | ||
summary = "Create a new fasta starting at the shift-offset +1 position and a shift_back chain file that can be used with the Liftover tool", | ||
oneLineSummary = "Creates a shifted fasta file and shift_back file", | ||
programGroup = ReferenceProgramGroup.class | ||
) | ||
public class ShiftFasta extends GATKTool { | ||
|
||
@Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, | ||
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, | ||
doc = "Path to write the output fasta to") | ||
protected String output; | ||
|
||
public static final String SHIFT_BACK_OUTPUT = "shift-back-output"; | ||
@Argument(fullName = SHIFT_BACK_OUTPUT, | ||
doc = "Path to write the shift_back file to") | ||
protected String shiftBackOutput; | ||
|
||
public static final String SHIFT_OFFSET_LIST = "shift-offset-list"; | ||
@Argument(fullName = SHIFT_OFFSET_LIST, | ||
doc="Number of bases to skip in the reference before starting the shifted reference. For example, if 300 is specified, the new fasta will start at the 301th base (count starting at 1)." + | ||
"If not specified, the contig will be shifted by half the number of bases. To skip the shifting of a contig, specify 0 in the list.", optional = true) | ||
private List<Integer> shiftOffsets = null; | ||
|
||
public static final String INTERAL_FILE_NAME = "interval-file-name"; | ||
@Argument(fullName = INTERAL_FILE_NAME, | ||
doc="Base name for interval files. Intervals will be midway between beginning and computed offset. If not specified or if custom offsets are specified, no interval files will be written.", optional = true) | ||
private String intervalFilename; | ||
|
||
public static final String LINE_WIDTH_LONG_NAME = "line-width"; | ||
@Argument(fullName= LINE_WIDTH_LONG_NAME, doc="Maximum length of sequence to write per line", optional=true) | ||
public int basesPerLine = FastaReferenceWriter.DEFAULT_BASES_PER_LINE; | ||
|
||
ReferenceDataSource refSource; | ||
FastaReferenceWriter refWriter; | ||
FileWriter chainFileWriter; | ||
FileWriter intervalRegularWriter; | ||
FileWriter intervalShiftedWriter; | ||
|
||
int chainId = 0; | ||
|
||
@Override | ||
public boolean requiresReference() { | ||
return true; | ||
} | ||
|
||
@Override | ||
public void onTraversalStart() { | ||
refSource = referenceArguments.getReferencePath() != null ? ReferenceDataSource.of(referenceArguments.getReferencePath()) : null; | ||
final Path path = IOUtils.getPath(output); | ||
chainId = 1; | ||
try { | ||
refWriter = new FastaReferenceWriterBuilder() | ||
.setFastaFile(path) | ||
.setBasesPerLine(basesPerLine) | ||
.build(); | ||
chainFileWriter = new FileWriter(shiftBackOutput); | ||
if (intervalFilename != null) { | ||
intervalRegularWriter = new FileWriter(intervalFilename+ ".intervals"); | ||
intervalShiftedWriter = new FileWriter(intervalFilename + ".shifted.intervals"); | ||
} | ||
} catch (IOException e) { | ||
throw new UserException.CouldNotCreateOutputFile("Couldn't create " + output + ", encountered exception: " + e.getMessage(), e); | ||
} | ||
} | ||
|
||
public void traverse() { | ||
SAMSequenceDictionary refDict = refSource.getSequenceDictionary(); | ||
long refLengthLong = refDict.getReferenceLength(); | ||
if (refLengthLong > Integer.MAX_VALUE) { | ||
// TODO fix this?? | ||
throw new UserException.BadInput("Reference length is too long"); | ||
} | ||
List<SAMSequenceRecord> contigs = refSource.getSequenceDictionary().getSequences(); | ||
final ListIterator<Integer> shiftOffsetsIt = (shiftOffsets != null && shiftOffsets.size() == contigs.size()) ? | ||
shiftOffsets.listIterator() : null; | ||
refSource.getSequenceDictionary().getSequences().forEach(seq -> shiftContig(seq, shiftOffsetsIt)); | ||
} | ||
|
||
protected void shiftContig(SAMSequenceRecord seq, ListIterator<Integer> shiftOffsetsIt) { | ||
int contigLength = seq.getSequenceLength(); | ||
int shiftOffset = shiftOffsetsIt == null ? contigLength/2 : shiftOffsetsIt.next(); | ||
if (shiftOffset != 0) { | ||
byte[] bases = refSource.queryAndPrefetch(new SimpleInterval(seq.getSequenceName(), 1, contigLength)).getBases(); | ||
byte[] basesAtEnd = Arrays.copyOfRange(bases, shiftOffset, bases.length); | ||
byte[] basesAtStart = Arrays.copyOf(bases, shiftOffset); | ||
int shiftBackOffset = bases.length - shiftOffset; | ||
|
||
try { | ||
refWriter.startSequence(seq.getSequenceName(), basesPerLine); | ||
refWriter.appendBases(basesAtEnd).appendBases(basesAtStart); | ||
chainFileWriter.append(createChainString(seq.getSequenceName(), shiftBackOffset, contigLength, shiftOffset, bases.length, 0, shiftBackOffset, chainId++)); | ||
chainFileWriter.append("\n" + shiftBackOffset + "\n\n"); | ||
chainFileWriter.append(createChainString(seq.getSequenceName(), shiftOffset - 1, contigLength, 0, shiftOffset, shiftBackOffset, bases.length, chainId++)); | ||
chainFileWriter.append("\n" + shiftOffset + "\n\n"); | ||
if (intervalFilename != null && shiftOffsetsIt == null) { | ||
int intervalStart = shiftOffset/2; | ||
int intervalEnd = intervalStart + contigLength/2 - 1; | ||
int shiftedIntervalStart = intervalStart; | ||
int shiftedIntervalEnd = intervalEnd + contigLength % 2; | ||
intervalRegularWriter.append(seq.getSequenceName() + ":" + intervalStart + "-" + intervalEnd + "\n"); | ||
intervalShiftedWriter.append(seq.getSequenceName() + ":" + shiftedIntervalStart + "-" + shiftedIntervalEnd + "\n"); | ||
} | ||
} catch (IOException e) { | ||
throw new UserException("Failed to write fasta due to " + e.getMessage(), e); | ||
} | ||
} | ||
} | ||
|
||
private String createChainString(String name, int score, int length, int start, int end, int shiftBackStart, int shiftBackEnd, int id) { | ||
String[] items = new String[] { "chain", | ||
Integer.toString(score), | ||
name, | ||
Integer.toString(length), | ||
"+", | ||
Integer.toString(shiftBackStart), | ||
Integer.toString(shiftBackEnd), | ||
name, | ||
Integer.toString(length), | ||
"+", | ||
Integer.toString(start), | ||
Integer.toString(end), | ||
Integer.toString(id) | ||
}; | ||
return String.join("\t", items); | ||
} | ||
|
||
@Override | ||
public Object onTraversalSuccess(){ | ||
// TODO is this right? | ||
return null; | ||
} | ||
|
||
@Override | ||
public void closeTool() { | ||
super.closeTool(); | ||
try{ | ||
if( refWriter != null ) { | ||
refWriter.close(); | ||
} | ||
} catch (IllegalStateException e){ | ||
//sink this | ||
} catch (IOException e) { | ||
throw new UserException("Failed to write fasta due to " + e.getMessage(), e); | ||
} | ||
try{ | ||
if (chainFileWriter != null) { | ||
chainFileWriter.close(); | ||
} | ||
} catch (IllegalStateException e){ | ||
//sink this | ||
} catch (IOException e) { | ||
throw new UserException("Failed to write chain file due to " + e.getMessage(), e); | ||
} | ||
try{ | ||
if (intervalRegularWriter != null) { | ||
intervalRegularWriter.close(); | ||
} | ||
if (intervalShiftedWriter != null) { | ||
intervalShiftedWriter.close(); | ||
} | ||
} catch (IllegalStateException e){ | ||
//sink this | ||
} catch (IOException e) { | ||
throw new UserException("Failed to write intervals due to " + e.getMessage(), e); | ||
} | ||
} | ||
|
||
} |