Skip to content

Narek/weighted multivector search #309

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.3)

set(LANTERN_VERSION 0.2.2)
set(LANTERN_VERSION 0.2.3)

project(
LanternDB
Expand Down Expand Up @@ -110,7 +110,7 @@ if (BUILD_C_TESTS)
target_link_directories(lantern_c_tests PRIVATE ${PostgreSQL_LIBRARY_DIRS})
# Link libpq
target_link_libraries(lantern_c_tests "-lpq")

add_custom_target(
test-client
COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh --client
Expand Down Expand Up @@ -251,9 +251,9 @@ set(_script_file "lantern--${RELEASE_ID}.sql")
# ============== Use clang compiler to emit llvm bytecode =================
find_program(LLVM_LTO NAMES llvm-lto)
if(
NOT LLVM_LTO STREQUAL "LLVM_LTO-NOTFOUND"
AND PostgreSQL_WITH_LLVM
AND CMAKE_C_COMPILER_ID MATCHES "Clang"
NOT LLVM_LTO STREQUAL "LLVM_LTO-NOTFOUND"
AND PostgreSQL_WITH_LLVM
AND CMAKE_C_COMPILER_ID MATCHES "Clang"
AND CMAKE_CXX_COMPILER_ID MATCHES "Clang"
)
target_link_options(lantern PRIVATE -flto)
Expand All @@ -270,6 +270,7 @@ endif()
set (_update_files
sql/updates/0.2.0--0.2.1.sql
sql/updates/0.2.1--0.2.2.sql
sql/updates/0.2.2--0.2.3.sql
)

# Generate version information for the binary
Expand Down Expand Up @@ -400,7 +401,7 @@ if (CLANG_FORMAT)
# Add format check target
add_custom_target(
format_check
COMMAND ${CLANG_FORMAT} --dry-run -Werror ${CLANG_FORMATTABLE_FILES}
COMMAND ${CLANG_FORMAT} --dry-run -Werror ${CLANG_FORMATTABLE_FILES}
COMMENT "Checking code formatting with clang-format"
VERBATIM
)
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/build-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ function install_platform_specific_dependencies() {
fi

pushd pg_cron
git fetch
git checkout ${PG_CRON_COMMIT_SHA}
make -j && make install
popd
Expand Down
4 changes: 4 additions & 0 deletions ci/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ function install_external_dependencies() {
rm -rf pgvector || true
mv pgvector-${PGVECTOR_VERSION} pgvector
pushd pgvector
# Set max ef_search to 50000
# .bak trick is needed to make this work on both mac and linux
# https://stackoverflow.com/questions/5694228/sed-in-place-flag-that-works-both-on-mac-bsd-and-linux
sed -i.bak "s/#define HNSW_MAX_EF_SEARCH.*/#define HNSW_MAX_EF_SEARCH 50000/g" src/hnsw.h
make -j && make install
popd

Expand Down
10 changes: 5 additions & 5 deletions scripts/run_all_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ mkdir -p $TMP_OUTDIR
if [ ! -d "$TMP_ROOT/vector_datasets" ]
then
if ! command -v curl &> /dev/null; then
echo "ERROR: The binary curl is required for running tests to download necessary vector test files"
exit 1
echo "ERROR: curl utility is required for running tests to download necessary vector test files"
exit 1
fi
mkdir -p $TMP_ROOT/vector_datasets
echo "Downloading necessary vector files..."
pushd $TMP_ROOT/vector_datasets
curl -sSo sift_base1k.csv https://storage.googleapis.com/lanterndb/sift_base1k.csv
curl -sSo sift_base1k.csv https://storage.googleapis.com/lanterndata/siftsmall/sift_base1k.csv
curl -sSo siftsmall_base.csv https://storage.googleapis.com/lanterndata/siftsmall/siftsmall_base.csv
curl -sSo tsv_wiki_sample.csv https://storage.googleapis.com/lanterndb/tsv_wiki_sample.csv
curl -sSo tsv_wiki_sample.csv https://storage.googleapis.com/lanterndata/wiki/tsv_wiki_sample.csv
curl -sSo views_vec10k.csv https://storage.googleapis.com/lanterndata/random_multicolumn/views_vec10k.csv
# Convert vector to arrays to be used with real[] type
cat sift_base1k.csv | sed -e 's/\[/{/g' | sed -e 's/\]/}/g' > sift_base1k_arrays.csv
Expand Down Expand Up @@ -121,7 +121,7 @@ fi

if [[ -n "$FILTER" || -n "$EXCLUDE" ]]; then
if [ "$PARALLEL" -eq 1 ]; then
TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test_begin:|test_end:/test:/' | tr " " "\n" | sed -e '/^$/d')
TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test_begin:|test_end:/test:/' | tr " " "\n" | sed -e '/^$/d')

# begin.sql isn't really optional. There may be cases where we want to drop it, but users should probably have to be very explicit about this
INCLUDE_BEGIN=1
Expand Down
Loading
Loading