Skip to content

Commit

Permalink
Added logging statements to b37 conversion process. (#7760)
Browse files Browse the repository at this point in the history
* Added logging statements to b37 conversion process.

Fixes #7757
  • Loading branch information
jonn-smith committed Apr 8, 2022
1 parent 2b0a558 commit 8318e69
Showing 1 changed file with 44 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1700,31 +1700,71 @@ public static boolean isSequenceDictionaryUsingB37Reference(final SAMSequenceDic
// Check to make sure all our sequences are accounted for in the given dictionary.

if ( sequenceDictionary == null ) {
logger.warn("No sequence dictionary provided in the input VCF file. Cannot check against B37.");
return false;
}

if ( B37_SEQUENCE_DICTIONARY == null ) {
B37_SEQUENCE_DICTIONARY = initializeB37SequenceDict();
}

// Track the missing / wrong data here for better logging:
final List<SAMSequenceRecord> missingSequenceRecords = new ArrayList<>();
final Map<SAMSequenceRecord, List<Integer>> incompatibleSequenceLengths = new HashMap<>();
final Map<SAMSequenceRecord, List<String>> incompatibleSequenceMd5Sums = new HashMap<>();

boolean isB37 = true;
for ( final SAMSequenceRecord b37SequenceRecord : B37_SEQUENCE_DICTIONARY.getSequences() ) {
// Now we check the Name, Length, and MD5Sum (if present) of all records:

final SAMSequenceRecord inputSequenceRecord = sequenceDictionary.getSequence(b37SequenceRecord.getSequenceName());
if ( inputSequenceRecord == null ) {
return false;
missingSequenceRecords.add(b37SequenceRecord);
isB37 = false;
continue;
}

if ( inputSequenceRecord.getSequenceLength() != b37SequenceRecord.getSequenceLength() ) {
return false;
incompatibleSequenceLengths.put(inputSequenceRecord,
Arrays.asList(inputSequenceRecord.getSequenceLength(), b37SequenceRecord.getSequenceLength()));
isB37 = false;
continue;
}

if ( (inputSequenceRecord.getMd5() != null) && (!inputSequenceRecord.getMd5().equals(b37SequenceRecord.getMd5())) ) {
return false;
incompatibleSequenceMd5Sums.put(inputSequenceRecord,
Arrays.asList(inputSequenceRecord.getMd5(), b37SequenceRecord.getMd5()));
isB37 = false;
}
}

if (!isB37) {
logger.info("Input VCF has been determined to not based on b37:");
if (missingSequenceRecords.size() > 0) {
logger.info(" The following contigs are present in b37 and missing in the input VCF sequence dictionary:");
for (final SAMSequenceRecord record : missingSequenceRecords) {
logger.info(" " + record.getSequenceName() + " (len=" + record.getSequenceLength() + ",assembly=" + record.getAssembly() + ")");
}
}

if (incompatibleSequenceLengths.size() > 0) {
logger.info(" The following contigs are present in both b37 and the input VCF sequence dictionary, but have conflicting length information:");
for (final Map.Entry<SAMSequenceRecord, List<Integer>> e : incompatibleSequenceLengths.entrySet()) {
final SAMSequenceRecord record = e.getKey();
logger.info(" " + record.getSequenceName() + " (len=" + record.getSequenceLength() + ",assembly=" + record.getAssembly() + "):" + " VCF Length: " + e.getValue().get(0).toString() + ", b37 Length: " + e.getValue().get(1).toString());
}
}

if (incompatibleSequenceMd5Sums.size() > 0) {
logger.info(" The following contigs are present in both b37 and the input VCF sequence dictionary, but have conflicting md5sum:");
for (final Map.Entry<SAMSequenceRecord, List<String>> e : incompatibleSequenceMd5Sums.entrySet()) {
final SAMSequenceRecord record = e.getKey();
logger.info(" " + record.getSequenceName() + " (len=" + record.getSequenceLength() + ",assembly=" + record.getAssembly() + "):" + " VCF md5sum: " + e.getValue().get(0) + ", b37 md5sum: " + e.getValue().get(1));
}
}
}

return true;
return isB37;
}

/**
Expand Down

0 comments on commit 8318e69

Please sign in to comment.