Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ public class CRAMEncoderV3_1 extends CRAMEncoder {
*/
public CRAMEncoderV3_1(final Bundle outputBundle, final ReadsEncoderOptions readsEncoderOptions) {
super(outputBundle, readsEncoderOptions);
throw new CRAMException("CRAM v3.1 encoding is not yet supported");
}

@Override
Expand Down
31 changes: 0 additions & 31 deletions src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package htsjdk.beta.plugin.registry;

import htsjdk.beta.codecs.reads.cram.cramV3_1.CRAMCodecV3_1;
import htsjdk.beta.exception.HtsjdkException;
import htsjdk.beta.exception.HtsjdkPluginException;
import htsjdk.beta.plugin.HtsVersion;
Expand All @@ -12,13 +11,9 @@
import htsjdk.beta.plugin.reads.ReadsDecoderOptions;
import htsjdk.beta.plugin.reads.ReadsEncoder;
import htsjdk.beta.plugin.reads.ReadsEncoderOptions;
import htsjdk.beta.plugin.reads.ReadsFormats;
import htsjdk.io.IOPath;
import htsjdk.utils.ValidationUtils;

import java.util.List;
import java.util.stream.Collectors;

/**
* Class with methods for resolving inputs and outputs to reads encoders and decoders.
* <p>
Expand Down Expand Up @@ -209,30 +204,4 @@ public ReadsEncoder getReadsEncoder(
.getEncoder(outputBundle, readsEncoderOptions);
}

/**
* Temporarily override to remove the CRAM 3.1 codec from the list of candidate codecs when the request is for
* the newest version, since it has no write implementation yet.
*/
@Override
protected List<ReadsCodec> filterByVersion(final List<ReadsCodec> candidateCodecs, final HtsVersion htsVersion) {
final List<ReadsCodec> preFilteredCodecs;
if (htsVersion.equals(HtsVersion.NEWEST_VERSION)) {
// if the request is for the newest version, then pre-filter out the CRAM 3.1 codec since it has no
// write implementation yet, and then delegate to the superclass to let it find the newest version among
// the remaining codecs
preFilteredCodecs = candidateCodecs.stream().filter(
c -> !(c.getFileFormat().equals(ReadsFormats.CRAM)
&& c.getVersion().equals(CRAMCodecV3_1.VERSION_3_1)))
.collect(Collectors.toList());
final HtsVersion newestVersion = preFilteredCodecs.stream()
.map(c -> c.getVersion())
.reduce(candidateCodecs.get(0).getVersion(),
(HtsVersion a, HtsVersion b) -> a.compareTo(b) > 0 ? a : b);
return candidateCodecs.stream().filter(
c -> c.getVersion().equals(newestVersion)).collect(Collectors.toList());
} else {
preFilteredCodecs = candidateCodecs;
}
return super.filterByVersion(preFilteredCodecs, htsVersion);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public final class CramVersions {
/**
* The default CRAM version when creating a new CRAM output file or stream.
*/
public static final CRAMVersion DEFAULT_CRAM_VERSION = CRAM_v3;
public static final CRAMVersion DEFAULT_CRAM_VERSION = CRAM_v3_1;

/**
* Return true if {@code candidateVersion} is a supported CRAM version.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ public static ByteBuffer allocateOutputBuffer(final int inSize) {
final int compressedSize = (int) (inSize + 257 * 257 * 3 + 9);
final ByteBuffer outputBuffer = allocateByteBuffer(compressedSize);
if (outputBuffer.remaining() < compressedSize) {
throw new CRAMException("Failed to allocate sufficient buffer size for RANS coder.");
throw new CRAMException("Failed to allocate sufficient buffer size for CRAM codec.");
}
return outputBuffer;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,15 +342,20 @@ private static ByteBuffer tryCompress(final ByteBuffer nameTokenStream, final bo
ByteBuffer compressedByteBuffer = null;

if (useArith == true) { // use the range encoder
// this code path is never executed by the default write profile, since we don't turn on the
// range coder, but it is used by the test suite
final int[] rangeEncoderFlagsSets = {
0,
RangeParams.ORDER_FLAG_MASK,
// based on a few observations (using ransNx16, not range), RLE, PACK, and ORDER/PACK seem to
// yield the best results; this to be validated for the range encoder but for now use the same
// flags as for ransNx16
0, // no flags, just use arith
RangeParams.RLE_FLAG_MASK, //64
RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, //65
RangeParams.PACK_FLAG_MASK, //128,
RangeParams.PACK_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, //129
//RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK, //65
//RangeParams.ORDER_FLAG_MASK,
// we don't include stripe here since it's not implemented for write
RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK // 193+8
//RangeParams.PACK_FLAG_MASK | RangeParams.RLE_FLAG_MASK | RangeParams.ORDER_FLAG_MASK // 193+8
};
for (int rangeEncoderFlagSet : rangeEncoderFlagsSets) {
if ((rangeEncoderFlagSet & RangeParams.ORDER_FLAG_MASK) != 0 && nameTokenStream.remaining() < 100) {
Expand All @@ -368,16 +373,24 @@ private static ByteBuffer tryCompress(final ByteBuffer nameTokenStream, final bo
compressedByteBuffer = tmpByteBuffer;
}
}
if (bestCompressedLength > nameTokenStream.limit()) {
// compression doesn't buy us anything; just use CAT
final RangeEncode rangeEncode = new RangeEncode();
nameTokenStream.rewind();
compressedByteBuffer = rangeEncode.compress(nameTokenStream, new RangeParams(RANSNx16Params.CAT_FLAG_MASK));
}
} else {
final int[] ransNx16FlagsSets = {
0,
RANSNx16Params.ORDER_FLAG_MASK,
0, // no flags, just use RANSNx16
RANSNx16Params.RLE_FLAG_MASK, //64
RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, //65
RANSNx16Params.PACK_FLAG_MASK, //128,
RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, //129
// based on a few observations using ransNx16, RLE, PACK, and ORDER/PACK seem to yield the
// best results; this needs more validation but for now don't try the remaining combinations
//RANSNx16Params.ORDER_FLAG_MASK,
//RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK, //65
// we don't include stripe here since it's not implemented for write
RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK // 193+8
//RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK // 193+8
};
for (int ransNx16FlagSet : ransNx16FlagsSets) {
if ((ransNx16FlagSet & RANSNx16Params.ORDER_FLAG_MASK) != 0 && nameTokenStream.remaining() < 100) {
Expand All @@ -395,6 +408,12 @@ private static ByteBuffer tryCompress(final ByteBuffer nameTokenStream, final bo
compressedByteBuffer = tmpByteBuffer;
}
}
if (bestCompressedLength > nameTokenStream.limit()) {
// compression doesn't buy us anything; just use CAT
final RANSNx16Encode ransEncode = new RANSNx16Encode();
nameTokenStream.rewind();
compressedByteBuffer = ransEncode.compress(nameTokenStream, new RANSNx16Params(RANSNx16Params.CAT_FLAG_MASK));
}
}
return compressedByteBuffer;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@

import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.cram.compression.ExternalCompressor;
import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params;
import htsjdk.samtools.cram.compression.nametokenisation.NameTokenisationDecode;
import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params;
import htsjdk.samtools.cram.encoding.CRAMEncoding;
import htsjdk.samtools.cram.encoding.external.ByteArrayStopEncoding;
import htsjdk.samtools.cram.encoding.external.ExternalByteEncoding;
Expand Down Expand Up @@ -140,7 +141,7 @@ public CompressionHeaderEncodingMap(final CRAMEncodingStrategy encodingStrategy)
putExternalRansOrderOneEncoding(DataSeries.RG_ReadGroup);
putExternalRansOrderZeroEncoding(DataSeries.RI_RefId);
putExternalRansOrderOneEncoding(DataSeries.RL_ReadLength);
putExternalByteArrayStopTabGzipEncoding(encodingStrategy, DataSeries.RN_ReadName);
putByteArrayStopNameTokEncoding(encodingStrategy, DataSeries.RN_ReadName);
putExternalGzipEncoding(encodingStrategy, DataSeries.RS_RefSkip);
putExternalByteArrayStopTabGzipEncoding(encodingStrategy, DataSeries.SC_SoftClip);
// the TC data series is obsolete
Expand Down Expand Up @@ -287,13 +288,13 @@ public ExternalCompressor getBestExternalCompressor(final byte[] data, final CRA
final int gzipLen = gzip.compress(data, null).length;

final ExternalCompressor rans0 = compressorCache.getCompressorForMethod(
BlockCompressionMethod.RANS,
RANS4x8Params.ORDER.ZERO.ordinal());
BlockCompressionMethod.RANSNx16,
RANSNx16Params.ORDER.ZERO.ordinal());
final int rans0Len = rans0.compress(data,null).length;

final ExternalCompressor rans1 = compressorCache.getCompressorForMethod(
BlockCompressionMethod.RANS,
RANS4x8Params.ORDER.ONE.ordinal());
BlockCompressionMethod.RANSNx16,
RANSNx16Params.ORDER.ONE.ordinal());
final int rans1Len = rans1.compress(data, null).length;

// find the best of general purpose codecs:
Expand Down Expand Up @@ -386,6 +387,15 @@ private void putExternalByteArrayStopTabGzipEncoding(final CRAMEncodingStrategy
compressorCache.getCompressorForMethod(BlockCompressionMethod.GZIP, encodingStrategy.getGZIPCompressionLevel()));
}

private void putByteArrayStopNameTokEncoding(final CRAMEncodingStrategy encodingStrategy, final DataSeries dataSeries) {
// ByteArrayStopEncoding is paired with name tokenisation since using it with the
// NameTokenisationDecode.NAME_SEPARATOR conveniently writes the read name data in the NAME_SEPARATOR
// delimited/terminated format that is expected by the downstream tokenisation compressor code
putExternalEncoding(dataSeries,
new ByteArrayStopEncoding(NameTokenisationDecode.NAME_SEPARATOR, dataSeries.getExternalBlockContentId()).toEncodingDescriptor(),
compressorCache.getCompressorForMethod(BlockCompressionMethod.NAME_TOKENISER, 0));
}

// add an external encoding appropriate for the dataSeries value type, with a GZIP compressor
private void putExternalGzipEncoding(final CRAMEncodingStrategy encodingStrategy, final DataSeries dataSeries) {
putExternalEncoding(
Expand All @@ -397,14 +407,14 @@ private void putExternalGzipEncoding(final CRAMEncodingStrategy encodingStrategy
private void putExternalRansOrderOneEncoding(final DataSeries dataSeries) {
putExternalEncoding(
dataSeries,
compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS4x8Params.ORDER.ONE.ordinal()));
compressorCache.getCompressorForMethod(BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ONE.ordinal()));
}

// add an external encoding appropriate for the dataSeries value type, with a RANS order 0 compressor
private void putExternalRansOrderZeroEncoding(final DataSeries dataSeries) {
putExternalEncoding(
dataSeries,
compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS4x8Params.ORDER.ZERO.ordinal()));
compressorCache.getCompressorForMethod(BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ZERO.ordinal()));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ public ExternalCompressor getCompressorForMethod(
return getCachedCompressorForMethod(compressionMethod, compressorSpecificArg);

case RANS: {
// for efficiency, we want to share the same underlying RANS object with both order-0 and
// order-1 ExternalCompressors
// in previous implementations, we would cache separate order-0 and order-1 compressors for performance
// reasons; we no longer NEED to do so but retain this structure for now
final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG ?
RANS4x8Params.ORDER.ZERO.ordinal() :
compressorSpecificArg;
Expand All @@ -103,8 +103,8 @@ public ExternalCompressor getCompressorForMethod(
}

case RANSNx16: {
// for efficiency, we want to share the same underlying RANSNx16 object with both order-0 and
// order-1 ExternalCompressors
// in previous implementations, we would cache separate order-0 and order-1 compressors for performance
// reasons; we no longer NEED to do so but retain this structure for now
final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG ?
RANSNx16Params.ORDER.ZERO.ordinal() :
compressorSpecificArg;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,28 @@

import htsjdk.HtsjdkTest;
import htsjdk.beta.codecs.reads.cram.cramV3_1.CRAMCodecV3_1;
import htsjdk.beta.plugin.IOUtils;
import htsjdk.beta.plugin.reads.ReadsDecoderOptions;
import htsjdk.beta.plugin.reads.ReadsEncoderOptions;
import htsjdk.beta.plugin.reads.ReadsFormats;
import htsjdk.beta.plugin.registry.HtsDefaultRegistry;
import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.cram.CRAM31Tests;
import htsjdk.samtools.cram.common.CramVersions;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.utils.SamtoolsTestUtils;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class HtsCRAMCodec31Test extends HtsjdkTest {
final IOPath TEST_DIR = new HtsPath("src/test/resources/htsjdk/samtools/");

Expand Down Expand Up @@ -49,4 +59,59 @@ public void testCRAMDecoder() {
}
}
}

@Test
public void testRoundTripCRAM31() throws IOException {
final IOPath sourceCRAMPath = new HtsPath(TEST_DIR + "cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram");
final IOPath referencePath = new HtsPath(TEST_DIR + "reference/human_g1k_v37.20.21.fasta.gz");
final IOPath tempCRAM31Path = IOUtils.createTempPath("htsCRAMCodecTemporary", FileExtensions.CRAM);

final ReadsDecoderOptions readsDecoderOptions =
new ReadsDecoderOptions().setCRAMDecoderOptions(
new CRAMDecoderOptions().setReferencePath(referencePath));
final ReadsEncoderOptions readsEncoderOptions =
new ReadsEncoderOptions()
.setPreSorted(true)
.setCRAMEncoderOptions(new CRAMEncoderOptions().setReferencePath(referencePath));

try (final CRAMDecoder cramDecoder = (CRAMDecoder)
HtsDefaultRegistry.getReadsResolver().getReadsDecoder(sourceCRAMPath, readsDecoderOptions);
final CRAMEncoder cram31Encoder = (CRAMEncoder)
HtsDefaultRegistry.getReadsResolver().getReadsEncoder(tempCRAM31Path, readsEncoderOptions)) {

Assert.assertNotNull(cramDecoder);
Assert.assertEquals(cramDecoder.getFileFormat(), ReadsFormats.CRAM);
Assert.assertTrue(cramDecoder.getDisplayName().contains(sourceCRAMPath.toString()));

Assert.assertNotNull(cram31Encoder);
Assert.assertEquals(cram31Encoder.getFileFormat(), ReadsFormats.CRAM);
Assert.assertTrue(cram31Encoder.getDisplayName().contains(tempCRAM31Path.toString()));

final SAMFileHeader samFileHeader = cramDecoder.getHeader();
cram31Encoder.setHeader(samFileHeader);
for (final SAMRecord samRec : cramDecoder) {
cram31Encoder.write(samRec);
}
}

// make sure we got a CRAM 3.1 file
Assert.assertEquals(CRAM31Tests.getCRAMVersion(tempCRAM31Path), CramVersions.CRAM_v3_1);

final List<SAMRecord> recs30 = new ArrayList<>();
final List<SAMRecord> recs31 = new ArrayList<>();

try (final CRAMDecoder cram30Decoder = (CRAMDecoder)
HtsDefaultRegistry.getReadsResolver().getReadsDecoder(sourceCRAMPath, readsDecoderOptions);
final CRAMDecoder cram31Decoder = (CRAMDecoder)
HtsDefaultRegistry.getReadsResolver().getReadsDecoder(tempCRAM31Path, readsDecoderOptions)) {
final Iterator<SAMRecord> it31 = cram31Decoder.iterator();
for (final SAMRecord sam30Rec : cram30Decoder) {
final SAMRecord sam31Rec = it31.next();
recs30.add(sam30Rec);
recs31.add(sam31Rec);
Assert.assertEquals(sam30Rec, sam31Rec);
}
}
}

}
Loading
Loading