Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for breakend replacement alleles in SVCluster #8408

Merged
merged 2 commits into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,18 @@ public SVCallRecord collapse(final SVClusterEngine.OutputCluster cluster) {
final List<String> algorithms = collapseAlgorithms(items);
final Map<String, Object> attributes = collapseAttributes(representative, items);

final List<Allele> altAlleles = collapseAltAlleles(items);
final Boolean strandA = type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV ? null : representative.getStrandA();
final Boolean strandB = type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV ? null : representative.getStrandB();

final Allele refAllele = collapseRefAlleles(representative.getContigA(), start);
final List<Allele> altAlleles;
if (type == GATKSVVCFConstants.StructuralVariantAnnotationType.BND) {
altAlleles = Collections.singletonList(constructBndAllele(strandA, representative.getStrandB(), representative.getContigB(), end, refAllele));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
altAlleles = Collections.singletonList(constructBndAllele(strandA, representative.getStrandB(), representative.getContigB(), end, refAllele));
altAlleles = Collections.singletonList(constructBndAllele(strandA, strandB, representative.getContigB(), end, refAllele));

} else {
altAlleles = collapseAltAlleles(items);
}
final List<Allele> alleles = collapseAlleles(altAlleles, refAllele);
final List<Genotype> genotypes = harmonizeAltAlleles(altAlleles, collapseAllGenotypes(items, refAllele));
final Boolean strandA = type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV ? null : representative.getStrandA();
final Boolean strandB = type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV ? null : representative.getStrandB();

final Set<String> filters = collapseFilters(items);
final Double quality = collapseQuality(items);
Expand Down Expand Up @@ -257,8 +263,22 @@ protected Allele collapseRefAlleles(final String contig, final int pos) {
return Allele.create(bases[0], true);
}

protected static Allele constructBndAllele(final Boolean strandA, final Boolean strandB, final String contigB,
final int posB, final Allele refAllele) {
Utils.validateArg(strandA != null, "First breakend strand cannot be null");
Utils.validateArg(strandB != null, "Second breakend strand cannot be null");
final String bracket = strandB ? "]" : "[";
final String str;
if (strandA) {
str = refAllele.getBaseString() + bracket + contigB + ":" + posB + bracket;
} else {
str = bracket + contigB + ":" + posB + bracket + refAllele.getBaseString();
}
return Allele.create(str, false);
}

/**
* Collapses alternate alleles into a list of representative alleles. Note this supports sub-typed alleles such as
* Collapses symbolic alleles into a list of representative alleles. Note this supports sub-typed alleles such as
* &lt;INS:MEI&gt;. If multiple alt alleles are found, the variant must either be a CNV or sub-typed alleles with the
* same base symbol (e.g. &lt;INS:MEI&gt; and &lt;INS:MEI:SVA&gt; would result in &lt;INS&gt;).
* @param items records whose alt alleles should be collapsed
Expand All @@ -276,6 +296,11 @@ protected List<Allele> collapseAltAlleles(final Collection<SVCallRecord> items)
} else if (altAlleles.size() == 1) {
return Collections.singletonList(altAlleles.get(0));
} else {
for (final Allele a : altAlleles) {
if (!validateAltAllele(a)) {
throw new IllegalArgumentException("Cannot collapse non-symbolic allele: " + a.getDisplayString());
}
}
// Multiple non-ref alleles need collapsing
// TODO does not search for subtypes e.g. <DUP:TANDEM>
int numCnvAlleles = 0;
Expand All @@ -285,8 +310,7 @@ protected List<Allele> collapseAltAlleles(final Collection<SVCallRecord> items)
if (a.equals(Allele.SV_SIMPLE_CNV)) {
numCnvAlleles++;
numMultiallelicAlleles++;
}
if (a.equals(Allele.SV_SIMPLE_DUP) || a.equals(Allele.SV_SIMPLE_DEL)) {
} else if (a.equals(Allele.SV_SIMPLE_DUP) || a.equals(Allele.SV_SIMPLE_DEL)) {
numCnvAlleles++;
}
}
Expand All @@ -307,11 +331,14 @@ protected List<Allele> collapseAltAlleles(final Collection<SVCallRecord> items)
} else {
throw new UnsupportedOperationException("Unimplemented alt allele summary strategy: " + altAlleleSummaryStrategy.name());
}
Utils.validate(collapsedAlleleTokens.length > 0, "Encountered multiple symbolic allele base symbols for non-CNV");
return Collections.singletonList(Allele.create("<" + String.join(":", collapsedAlleleTokens) + ">", false));
}
}

protected boolean validateAltAllele(final Allele allele) {
return !allele.isReference() && allele.isSymbolic() && !allele.isBreakpoint() && !allele.isSingleBreakend();
}

private String[] collapseAltAllelesCommon(final List<Allele> alleles) {
final List<String[]> alleleTokens = alleles.stream()
.map(GATKSVVariantContextUtils::getSymbolicAlleleSymbols)
Expand Down Expand Up @@ -646,23 +673,22 @@ private SVCallRecord getRepresentativeIntervalItem(final Collection<SVCallRecord
}
// Favor more common variant with most similar distance
final Comparator<SVCallRecord> carrierCountComparator = Comparator.comparing(r -> -r.getCarrierGenotypeList().size());
final Comparator<SVCallRecord> distanceComparator = Comparator.comparing(r -> getDistance(r, starts, ends));
final Comparator<SVCallRecord> idComparator = Comparator.comparing(r -> getDistance(r, starts, ends)); // stabilizes order
final Comparator<SVCallRecord> distanceComparator = Comparator.comparing(r -> getDistance(r.getPositionA(), r.getPositionB(), starts, ends));
final Comparator<SVCallRecord> idComparator = Comparator.comparing(r -> getDistance(r.getPositionA(), r.getPositionB(), starts, ends)); // stabilizes order
return records.stream().min(
carrierCountComparator
.thenComparing(distanceComparator)
.thenComparing(idComparator)).get();
}

private long getDistance(final SVCallRecord record,
final int[] starts,
final int[] ends) {
protected static long getDistance(final int posA,
final int posB,
final int[] starts,
final int[] ends) {
long d = 0;
final int posA = record.getPositionA();
for (int j = 0; j < starts.length; j++) {
d += FastMath.abs(starts[j] - posA);
}
final int posB = record.getPositionB();
for (int j = 0; j < ends.length; j++) {
d += FastMath.abs(ends[j] - posB);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,14 +411,9 @@ private void write(final boolean force) {
}

private VCFHeader createHeader() {
final VCFHeader header = new VCFHeader(getDefaultToolVCFHeaderLines(), samples);
header.setVCFHeaderVersion(VCFHeaderVersion.VCF4_2);
final VCFHeader header = new VCFHeader(getHeaderForVariants().getMetaDataInInputOrder(), samples);
header.setSequenceDictionary(dictionary);

// Copy from inputs
getHeaderForVariants().getFormatHeaderLines().forEach(header::addMetaDataLine);
getHeaderForVariants().getInfoHeaderLines().forEach(header::addMetaDataLine);

// Required info lines
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
header.addMetaDataLine(GATKSVVCFHeaderLines.getInfoLine(GATKSVVCFConstants.SVLEN));
Expand Down
Loading