Skip to content
This repository was archived by the owner on Dec 8, 2022. It is now read-only.

Commit f708af5

Browse files
authored
Merge pull request #136 from seq-lang/develop
Develop
2 parents dd60291 + 124d80a commit f708af5

29 files changed

+1808
-5587
lines changed

CMakeLists.txt

+10-15
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,6 @@ option(SEQ_THREADED "compile runtime library for multithreading" OFF)
2020
option(SEQ_JITBRIDGE "support JIT interoperability" OFF)
2121
find_package(ZLIB REQUIRED)
2222

23-
find_library(HTS_LIB NAMES libhts.a libhts)
24-
if(NOT HTS_LIB)
25-
message(FATAL_ERROR "HTSlib not found")
26-
else()
27-
message(STATUS "Found HTSlib: ${HTS_LIB}")
28-
endif()
29-
3023
find_library(GC_LIB NAMES libgc.a libgc)
3124
if(NOT GC_LIB)
3225
message(FATAL_ERROR "GC not found")
@@ -36,15 +29,17 @@ endif()
3629

3730
add_library(seqrt SHARED runtime/lib.h
3831
runtime/lib.cpp
39-
runtime/align.cpp
4032
runtime/exc.cpp
41-
runtime/ksw2/ksw2.h
42-
runtime/ksw2/ksw2_extd2_sse.cpp
43-
runtime/ksw2/ksw2_exts2_sse.cpp
44-
runtime/ksw2/ksw2_extz2_sse.cpp
45-
runtime/ksw2/ksw2_gg2_sse.cpp)
46-
target_link_libraries(seqrt PUBLIC bz2 lzma curl ${ZLIB_LIBRARIES} ${GC_LIB} ${HTS_LIB} Threads::Threads)
47-
set_source_files_properties(runtime/align.cpp PROPERTIES COMPILE_FLAGS "-march=native")
33+
runtime/sw/ksw2.h
34+
runtime/sw/ksw2_extd2_sse.cpp
35+
runtime/sw/ksw2_exts2_sse.cpp
36+
runtime/sw/ksw2_extz2_sse.cpp
37+
runtime/sw/ksw2_gg2_sse.cpp
38+
runtime/sw/intersw.h
39+
runtime/sw/intersw.cpp)
40+
target_include_directories(seqrt PRIVATE runtime)
41+
target_link_libraries(seqrt PUBLIC ${ZLIB_LIBRARIES} ${GC_LIB} Threads::Threads)
42+
set_source_files_properties(runtime/sw/intersw.cpp PROPERTIES COMPILE_FLAGS "-march=native")
4843

4944
if(SEQ_THREADED)
5045
find_package(OpenMP REQUIRED)

compiler/lang/expr.cpp

+36-17
Original file line numberDiff line numberDiff line change
@@ -1739,10 +1739,9 @@ Value *CallExpr::codegen0(BaseFunc *base, BasicBlock *&block) {
17391739
"", "",
17401740
"right", "generic_sc",
17411741
"approx_max", "approx_drop",
1742-
"ext_only", "rev_cigar",
1742+
"", "",
17431743
"splice", "splice_fwd",
1744-
"splice_rev", "splice_flank",
1745-
"glob"};
1744+
"splice_rev", "splice_flank"};
17461745
auto *baseFunc = dynamic_cast<Func *>(base);
17471746
if (baseFunc && baseFunc->hasAttribute("inter_align")) {
17481747
if (auto *elemExpr = dynamic_cast<GetElemExpr *>(func)) {
@@ -1792,6 +1791,7 @@ Value *CallExpr::codegen0(BaseFunc *base, BasicBlock *&block) {
17921791
paramExprs.ambig = args[3];
17931792
paramExprs.gapo = args[4];
17941793
paramExprs.gape = args[5];
1794+
paramExprs.score_only = args[11];
17951795
paramExprs.bandwidth = args[8];
17961796
paramExprs.zdrop = args[9];
17971797
paramExprs.end_bonus = args[10];
@@ -1809,28 +1809,47 @@ Value *CallExpr::codegen0(BaseFunc *base, BasicBlock *&block) {
18091809
"query for inter-sequence alignment is not of type seq");
18101810
if (!args[0]->getType()->is(types::Seq))
18111811
throw exc::SeqException(
1812-
"reference for inter-sequence alignment is not of type seq");
1812+
"target for inter-sequence alignment is not of type seq");
18131813
Value *query = self->codegen(base, block);
1814-
Value *reference = args[0]->codegen(base, block);
1814+
Value *target = args[0]->codegen(base, block);
18151815
types::RecordType *yieldType = PipeExpr::getInterAlignYieldType();
1816+
types::Type *resultType =
1817+
yieldType->getBaseType(2); // (CIGAR, score)
18161818
Value *yieldVal = yieldType->defaultValue(block);
18171819
yieldVal = yieldType->setMemb(yieldVal, "query", query, block);
1820+
yieldVal = yieldType->setMemb(yieldVal, "target", target, block);
1821+
1822+
// set up flags
1823+
static const int KSW_EZ_EXTZ_ONLY = 0x40;
1824+
static const int KSW_EZ_REV_CIGAR = 0x80;
1825+
IRBuilder<> builder(block);
1826+
Value *flags = builder.getInt32(0);
1827+
Value *b;
1828+
// ext_only
1829+
b = args[16]->codegen(base, block);
1830+
builder.SetInsertPoint(block);
1831+
b = builder.CreateTrunc(b, builder.getInt1Ty());
1832+
b = builder.CreateSelect(b, builder.getInt32(KSW_EZ_EXTZ_ONLY),
1833+
builder.getInt32(0));
1834+
flags = builder.CreateOr(flags, b);
1835+
// rev_cigar
1836+
b = args[17]->codegen(base, block);
1837+
builder.SetInsertPoint(block);
1838+
b = builder.CreateTrunc(b, builder.getInt1Ty());
1839+
b = builder.CreateSelect(b, builder.getInt32(KSW_EZ_REV_CIGAR),
1840+
builder.getInt32(0));
1841+
flags = builder.CreateOr(flags, b);
1842+
1843+
// use the score field of the alignment to send flags
1844+
flags = builder.CreateZExt(flags, seqIntLLVM(block->getContext()));
1845+
Value *resultVal = resultType->defaultValue(block);
1846+
resultVal = resultType->setMemb(resultVal, "_score", flags, block);
18181847
yieldVal =
1819-
yieldType->setMemb(yieldVal, "reference", reference, block);
1848+
yieldType->setMemb(yieldVal, "alignment", resultVal, block);
18201849

18211850
baseFunc->codegenYield(yieldVal, yieldType, block);
18221851
yieldVal = baseFunc->codegenYieldExpr(block, /*suspend=*/false);
1823-
Value *score = yieldType->memb(yieldVal, "score", block);
1824-
1825-
// realign if score < 0
1826-
Func *realignFunc = Func::getBuiltin("_interaln_realign");
1827-
Function *realign = realignFunc->getFunc(block->getModule());
1828-
Value *params = PipeExpr::validateAndCodegenInterAlignParams(
1829-
paramExprs, base, block);
1830-
1831-
IRBuilder<> builder(block);
1832-
Value *alignment =
1833-
builder.CreateCall(realign, {query, reference, score, params});
1852+
Value *alignment = yieldType->memb(yieldVal, "alignment", block);
18341853
return alignment;
18351854
}
18361855
}

compiler/lang/patterns.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ Value *StrPattern::codegen(BaseFunc *base, types::Type *type, Value *val,
117117
LLVMContext &context = block->getContext();
118118
Value *pat = StrExpr(this->val).codegen(base, block);
119119
Value *b =
120-
types::Str->callMagic("__eq__", {type}, pat, {val}, block, getTryCatch());
120+
types::Str->callMagic("__eq__", {type}, pat, {val}, block, nullptr);
121121
IRBuilder<> builder(block);
122122
return builder.CreateTrunc(b, IntegerType::getInt1Ty(context));
123123
}
@@ -243,7 +243,7 @@ Value *ArrayPattern::codegen(BaseFunc *base, types::Type *type, Value *val,
243243
for (unsigned i = 0; i < star; i++) {
244244
Value *idx = ConstantInt::get(seqIntLLVM(context), i);
245245
Value *sub = type->callMagic("__getitem__", {types::Int}, val, {idx},
246-
block, getTryCatch());
246+
block, nullptr);
247247
Value *subRes = patterns[i]->codegen(
248248
base, type->magicOut("__getitem__", {types::Int}), sub, block);
249249
builder.SetInsertPoint(
@@ -258,7 +258,7 @@ Value *ArrayPattern::codegen(BaseFunc *base, types::Type *type, Value *val,
258258
idx, ConstantInt::get(seqIntLLVM(context), patterns.size()));
259259

260260
Value *sub = type->callMagic("__getitem__", {types::Int}, val, {idx},
261-
block, getTryCatch());
261+
block, nullptr);
262262
Value *subRes = patterns[i]->codegen(
263263
base, type->magicOut("__getitem__", {types::Int}), sub, block);
264264
builder.SetInsertPoint(
@@ -269,7 +269,7 @@ Value *ArrayPattern::codegen(BaseFunc *base, types::Type *type, Value *val,
269269
for (unsigned i = 0; i < patterns.size(); i++) {
270270
Value *idx = ConstantInt::get(seqIntLLVM(context), i);
271271
Value *sub = type->callMagic("__getitem__", {types::Int}, val, {idx},
272-
block, getTryCatch());
272+
block, nullptr);
273273
Value *subRes = patterns[i]->codegen(
274274
base, type->magicOut("__getitem__", {types::Int}), sub, block);
275275
builder.SetInsertPoint(

compiler/lang/pipeline.cpp

+40-30
Original file line numberDiff line numberDiff line change
@@ -227,28 +227,29 @@ static void applyCanonicalKmerOptimization(std::vector<Expr *> &stages,
227227

228228
// make sure params are globals or literals, since codegen'ing in function entry
229229
// block
230-
static Value *validateAndCodegenInterAlignParamExpr(Expr *e,
231-
const std::string &name,
232-
BaseFunc *base,
233-
BasicBlock *block) {
230+
template <typename E = IntExpr>
231+
static Value *validateAndCodegenInterAlignParamExpr(
232+
Expr *e, const std::string &name, BaseFunc *base, BasicBlock *block,
233+
bool i32 = false, types::Type *expectedType = types::Int) {
234234
if (!e)
235235
throw exc::SeqException("inter-sequence alignment parameter '" + name +
236236
"' not specified");
237-
if (!e->getType()->is(types::Int))
237+
if (!e->getType()->is(expectedType))
238238
throw exc::SeqException("inter-sequence alignment parameter '" + name +
239-
"' is not of type int");
239+
"' is not of type " + expectedType->getName());
240240
bool valid = false;
241241
if (auto *v = dynamic_cast<VarExpr *>(e)) {
242242
valid = v->getVar()->isGlobal();
243243
} else {
244-
valid = (dynamic_cast<IntExpr *>(e) != nullptr);
244+
valid = (dynamic_cast<E *>(e) != nullptr);
245245
}
246246
if (!valid)
247247
throw exc::SeqException("inter-sequence alignment parameters must be "
248248
"constants or global variables");
249249
Value *val = e->codegen(base, block);
250250
IRBuilder<> builder(block);
251-
return builder.CreateTrunc(val, builder.getInt8Ty());
251+
return builder.CreateZExtOrTrunc(val, i32 ? builder.getInt32Ty()
252+
: builder.getInt8Ty());
252253
}
253254

254255
Value *PipeExpr::validateAndCodegenInterAlignParams(
@@ -271,14 +272,18 @@ Value *PipeExpr::validateAndCodegenInterAlignParams(
271272
paramVal = validateAndCodegenInterAlignParamExpr(paramExprs.gape, "gape",
272273
base, block);
273274
params = paramsType->setMemb(params, "gape", paramVal, block);
274-
paramVal = validateAndCodegenInterAlignParamExpr(paramExprs.bandwidth,
275-
"bandwidth", base, block);
275+
paramVal = validateAndCodegenInterAlignParamExpr<BoolExpr>(
276+
paramExprs.score_only, "score_only", base, block, /*i32=*/false,
277+
/*expectedType=*/types::Bool);
278+
params = paramsType->setMemb(params, "score_only", paramVal, block);
279+
paramVal = validateAndCodegenInterAlignParamExpr(
280+
paramExprs.bandwidth, "bandwidth", base, block, /*i32=*/true);
276281
params = paramsType->setMemb(params, "bandwidth", paramVal, block);
277282
paramVal = validateAndCodegenInterAlignParamExpr(paramExprs.zdrop, "zdrop",
278-
base, block);
283+
base, block, /*i32=*/true);
279284
params = paramsType->setMemb(params, "zdrop", paramVal, block);
280-
paramVal = validateAndCodegenInterAlignParamExpr(paramExprs.zdrop,
281-
"end_bonus", base, block);
285+
paramVal = validateAndCodegenInterAlignParamExpr(
286+
paramExprs.end_bonus, "end_bonus", base, block, /*i32=*/true);
282287
params = paramsType->setMemb(params, "end_bonus", paramVal, block);
283288
return params;
284289
}
@@ -461,8 +466,7 @@ Value *PipeExpr::codegenPipe(BaseFunc *base,
461466
}
462467

463468
// following defs are from bio/align.seq
464-
const int MAX_SEQ_LEN_REF = 256;
465-
const int MAX_SEQ_LEN_QER = 128;
469+
const int LEN_LIMIT = 512;
466470
const int MAX_SEQ_LEN8 = 128;
467471
const int MAX_SEQ_LEN16 = 32768;
468472
types::RecordType *pairType = PipeExpr::getInterAlignSeqPairType();
@@ -483,8 +487,8 @@ Value *PipeExpr::codegenPipe(BaseFunc *base,
483487
IRBuilder<> builder(preamble);
484488
const unsigned W = PipeExpr::SCHED_WIDTH_INTERALIGN;
485489
Value *statesSize = builder.getInt64(genType->size(module) * W);
486-
Value *bufRefSize = builder.getInt64(MAX_SEQ_LEN_REF * W);
487-
Value *bufQerSize = builder.getInt64(MAX_SEQ_LEN_QER * W);
490+
Value *bufRefSize = builder.getInt64(LEN_LIMIT * W);
491+
Value *bufQerSize = builder.getInt64(LEN_LIMIT * W);
488492
Value *pairsSize = builder.getInt64(pairType->size(module) * W);
489493
Value *histSize = builder.getInt64((MAX_SEQ_LEN8 + MAX_SEQ_LEN16 + 32) * 4);
490494
Value *states = builder.CreateCall(alloc, statesSize);
@@ -495,10 +499,10 @@ Value *PipeExpr::codegenPipe(BaseFunc *base,
495499
statesTemp, genType->getLLVMType(context)->getPointerTo());
496500
Value *bufRef = builder.CreateCall(allocAtomic, bufRefSize);
497501
Value *bufQer = builder.CreateCall(allocAtomic, bufQerSize);
498-
Value *pairs = builder.CreateCall(allocAtomic, pairsSize);
502+
Value *pairs = builder.CreateCall(alloc, pairsSize);
499503
pairs = builder.CreateBitCast(
500504
pairs, pairType->getLLVMType(context)->getPointerTo());
501-
Value *pairsTemp = builder.CreateCall(allocAtomic, pairsSize);
505+
Value *pairsTemp = builder.CreateCall(alloc, pairsSize);
502506
pairsTemp = builder.CreateBitCast(
503507
pairsTemp, pairType->getLLVMType(context)->getPointerTo());
504508
Value *hist = builder.CreateCall(allocAtomic, histSize);
@@ -537,7 +541,8 @@ Value *PipeExpr::codegenPipe(BaseFunc *base,
537541

538542
builder.SetInsertPoint(notFull);
539543
N = builder.CreateLoad(filled);
540-
N = builder.CreateCall(queue, {task, pairs, bufRef, bufQer, states, N});
544+
N = builder.CreateCall(queue,
545+
{task, pairs, bufRef, bufQer, states, N, params});
541546
builder.CreateStore(N, filled);
542547
builder.CreateBr(exit);
543548
state.block = exit;
@@ -887,24 +892,29 @@ PipeExpr *PipeExpr::clone(Generic *ref) {
887892
}
888893

889894
types::RecordType *PipeExpr::getInterAlignYieldType() {
890-
return types::RecordType::get({types::Seq, types::Seq, types::Int},
891-
{"query", "reference", "score"},
895+
auto *i32 = types::IntNType::get(32, true);
896+
static types::RecordType *cigarType = types::RecordType::get(
897+
{types::PtrType::get(i32), types::Int}, {"_data", "_len"}, "CIGAR");
898+
static types::RecordType *resultType = types::RecordType::get(
899+
{cigarType, types::Int}, {"_cigar", "_score"}, "Alignment");
900+
return types::RecordType::get({types::Seq, types::Seq, resultType},
901+
{"query", "target", "alignment"},
892902
"InterAlignYield");
893903
}
894904

895905
types::RecordType *PipeExpr::getInterAlignParamsType() {
896906
auto *i8 = types::IntNType::get(8, true);
897-
return types::RecordType::get(
898-
{i8, i8, i8, i8, i8, i8, i8, i8},
899-
{"a", "b", "ambig", "gapo", "gape", "bandwidth", "zdrop", "end_bonus"},
900-
"InterAlignParams");
907+
auto *i32 = types::IntNType::get(32, true);
908+
return types::RecordType::get({i8, i8, i8, i8, i8, i8, i32, i32, i32},
909+
{"a", "b", "ambig", "gapo", "gape",
910+
"score_only", "bandwidth", "zdrop",
911+
"end_bonus"},
912+
"InterAlignParams");
901913
}
902914

903915
types::RecordType *PipeExpr::getInterAlignSeqPairType() {
904916
auto *i32 = types::IntNType::get(32, true);
905917
return types::RecordType::get(
906-
{i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32},
907-
{"idr", "idq", "id", "len1", "len2", "h0", "seqid", "regid", "score",
908-
"tle", "gtle", "qle", "gscore", "max_off"},
909-
"SeqPair");
918+
{i32, i32, i32, i32, types::PtrType::get(i32), i32, i32},
919+
{"id", "len1", "len2", "score", "cigar", "n_cigar", "flags"}, "SeqPair");
910920
}

compiler/lang/seq.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
#define SEQ_VERSION_MAJOR 0
3737
#define SEQ_VERSION_MINOR 9
38-
#define SEQ_VERSION_PATCH 6
38+
#define SEQ_VERSION_PATCH 7
3939

4040
namespace seq {
4141
namespace config {

compiler/parser/ocaml.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ unique_ptr<SuiteStmt> ocaml_parse(string file, string code, int line_offset,
399399
CAMLlocal3(p1, f, c);
400400
static value *closure_f = nullptr;
401401
if (!closure_f) {
402-
closure_f = caml_named_value("menhir_parse");
402+
closure_f = (value *)caml_named_value("menhir_parse");
403403
}
404404
f = caml_copy_string(file.c_str());
405405
c = caml_copy_string(code.c_str());

compiler/types/func.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ void types::GenType::initOps() {
328328
func = cast<Function>(module->getOrInsertFunction(
329329
name, outType->getLLVMType(context), getLLVMType(context)));
330330
func->setLinkage(GlobalValue::PrivateLinkage);
331+
func->setDoesNotThrow();
331332
func->setPersonalityFn(makePersonalityFunc(module));
332333
func->addFnAttr(Attribute::AlwaysInline);
333334

@@ -355,7 +356,6 @@ void types::GenType::initOps() {
355356
func = cast<Function>(module->getOrInsertFunction(
356357
name, Bool->getLLVMType(context), getLLVMType(context)));
357358
func->setLinkage(GlobalValue::PrivateLinkage);
358-
func->setDoesNotThrow();
359359
func->addFnAttr(Attribute::AlwaysInline);
360360

361361
Value *arg = func->arg_begin();

compiler/types/func.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,12 @@ class GenType : public Type {
4343
public:
4444
enum GenTypeKind { NORMAL, PREFETCH, INTERALIGN };
4545
struct InterAlignParams { // see bio/align.seq for definition
46-
Expr *a, *b, *ambig, *gapo, *gape, *bandwidth, *zdrop, *end_bonus;
46+
Expr *a, *b, *ambig, *gapo, *gape, *score_only, *bandwidth, *zdrop,
47+
*end_bonus;
4748
InterAlignParams()
4849
: a(nullptr), b(nullptr), ambig(nullptr), gapo(nullptr), gape(nullptr),
49-
bandwidth(nullptr), zdrop(nullptr), end_bonus(nullptr) {}
50+
score_only(nullptr), bandwidth(nullptr), zdrop(nullptr),
51+
end_bonus(nullptr) {}
5052
};
5153

5254
private:

docs/sphinx/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def setup(sphinx):
3030
# The short X.Y version
3131
version = u'0.9'
3232
# The full version, including alpha/beta/rc tags
33-
release = u'0.9.6'
33+
release = u'0.9.7'
3434

3535
# Logo path
3636
html_logo = '../images/logo.png'

docs/sphinx/tutorial.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ Aligning sequences is very straightforward in Seq, and supports numerous options
270270
aln = s1.align(s2, a=2, b=4, gapo=4, gape=2, gapo2=13, gape2=1)
271271
print aln.cigar, aln.score
272272
273-
Here is the list of options supported by the ``align()`` method; all are optional:
273+
Here is the list of options supported by the ``align()`` method; all are optional (default is global alignment):
274274

275275
- ``a``: match score
276276
- ``b``: mismatch score
@@ -286,9 +286,8 @@ Here is the list of options supported by the ``align()`` method; all are optiona
286286
- ``approx_max``: if true, approximate max
287287
- ``approx_drop``: if true, approximate Z-drop
288288
- ``rev_cigar``: if true, reverse CIGAR in output
289-
- ``ext_only``: if true, only perform extension
289+
- ``ext_only``: if true, perform extension alignment
290290
- ``splice``: if true, perform spliced alignment
291-
- ``glob``: if true, perform global alignment
292291

293292
Note that all costs/scores are positive by convention.
294293

0 commit comments

Comments
 (0)