lanterndata · Ngalstyan4 · Apr 9, 2024 · Apr 6, 2024 · Apr 6, 2024 · Apr 9, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.3)
 
-set(LANTERN_VERSION 0.2.2)
+set(LANTERN_VERSION 0.2.3)
 
 project(
   LanternDB
@@ -110,7 +110,7 @@ if (BUILD_C_TESTS)
   target_link_directories(lantern_c_tests PRIVATE ${PostgreSQL_LIBRARY_DIRS})
   # Link libpq
   target_link_libraries(lantern_c_tests "-lpq")
-  
+
   add_custom_target(
     test-client
     COMMAND ${CMAKE_SOURCE_DIR}/scripts/run_all_tests.sh --client
@@ -251,9 +251,9 @@ set(_script_file "lantern--${RELEASE_ID}.sql")
 # ============== Use clang compiler to emit llvm bytecode =================
 find_program(LLVM_LTO NAMES llvm-lto)
 if(
-  NOT LLVM_LTO STREQUAL "LLVM_LTO-NOTFOUND" 
-  AND PostgreSQL_WITH_LLVM 
-  AND CMAKE_C_COMPILER_ID MATCHES "Clang" 
+  NOT LLVM_LTO STREQUAL "LLVM_LTO-NOTFOUND"
+  AND PostgreSQL_WITH_LLVM
+  AND CMAKE_C_COMPILER_ID MATCHES "Clang"
   AND CMAKE_CXX_COMPILER_ID MATCHES "Clang"
 )
   target_link_options(lantern PRIVATE -flto)
@@ -270,6 +270,7 @@ endif()
 set (_update_files
   sql/updates/0.2.0--0.2.1.sql
   sql/updates/0.2.1--0.2.2.sql
+  sql/updates/0.2.2--0.2.3.sql
 )
 
 # Generate version information for the binary
@@ -400,7 +401,7 @@ if (CLANG_FORMAT)
   # Add format check target
   add_custom_target(
     format_check
-    COMMAND ${CLANG_FORMAT} --dry-run -Werror ${CLANG_FORMATTABLE_FILES} 
+    COMMAND ${CLANG_FORMAT} --dry-run -Werror ${CLANG_FORMATTABLE_FILES}
     COMMENT "Checking code formatting with clang-format"
     VERBATIM
   )

diff --git a/ci/scripts/build-linux.sh b/ci/scripts/build-linux.sh
@@ -47,6 +47,7 @@ function install_platform_specific_dependencies() {
     fi
 
     pushd pg_cron
+      git fetch
       git checkout ${PG_CRON_COMMIT_SHA}
       make -j && make install
     popd

diff --git a/ci/scripts/build.sh b/ci/scripts/build.sh
@@ -46,6 +46,10 @@ function install_external_dependencies() {
     rm -rf pgvector || true
     mv pgvector-${PGVECTOR_VERSION} pgvector
     pushd pgvector
+      # Set max ef_search to 50000
+      # .bak trick is needed to make this work on both mac and linux
+      # https://stackoverflow.com/questions/5694228/sed-in-place-flag-that-works-both-on-mac-bsd-and-linux
+      sed -i.bak "s/#define HNSW_MAX_EF_SEARCH.*/#define HNSW_MAX_EF_SEARCH 50000/g" src/hnsw.h
       make -j && make install
     popd
 

diff --git a/scripts/run_all_tests.sh b/scripts/run_all_tests.sh
@@ -37,15 +37,15 @@ mkdir -p $TMP_OUTDIR
 if [ ! -d "$TMP_ROOT/vector_datasets" ]
 then
     if ! command -v curl &> /dev/null; then
-	echo "ERROR: The binary curl is required for running tests to download necessary vector test files"
-	exit 1
+        echo "ERROR: curl utility is required for running tests to download necessary vector test files"
+        exit 1
     fi
     mkdir -p $TMP_ROOT/vector_datasets
     echo "Downloading necessary vector files..."
     pushd $TMP_ROOT/vector_datasets
-        curl -sSo sift_base1k.csv https://storage.googleapis.com/lanterndb/sift_base1k.csv
+        curl -sSo sift_base1k.csv https://storage.googleapis.com/lanterndata/siftsmall/sift_base1k.csv
         curl -sSo siftsmall_base.csv https://storage.googleapis.com/lanterndata/siftsmall/siftsmall_base.csv
-        curl -sSo tsv_wiki_sample.csv https://storage.googleapis.com/lanterndb/tsv_wiki_sample.csv
+        curl -sSo tsv_wiki_sample.csv https://storage.googleapis.com/lanterndata/wiki/tsv_wiki_sample.csv
         curl -sSo views_vec10k.csv https://storage.googleapis.com/lanterndata/random_multicolumn/views_vec10k.csv
         # Convert vector to arrays to be used with real[] type
         cat sift_base1k.csv | sed -e 's/\[/{/g' | sed -e 's/\]/}/g' > sift_base1k_arrays.csv
@@ -121,7 +121,7 @@ fi
 
 if [[ -n "$FILTER" || -n "$EXCLUDE" ]]; then
     if [ "$PARALLEL" -eq 1 ]; then
-    	TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test_begin:|test_end:/test:/' | tr " " "\n" | sed -e '/^$/d')
+        TEST_FILES=$(cat $SCHEDULE | grep -E '^(test:|test_begin:|test_end:)' | sed -E -e 's/^test_begin:|test_end:/test:/' | tr " " "\n" | sed -e '/^$/d')
 
         # begin.sql isn't really optional. There may be cases where we want to drop it, but users should probably have to be very explicit about this
         INCLUDE_BEGIN=1