apache
diff --git a/‎.github/actions/java-test/action.yaml
+1-1 b/‎.github/actions/java-test/action.yaml
+1-1
diff --git a/‎.github/workflows/pr_build.yml
+1-2 b/‎.github/workflows/pr_build.yml
+1-2
diff --git a/‎.github/workflows/spark_sql_test.yml
+1-1 b/‎.github/workflows/spark_sql_test.yml
+1-1
diff --git a/‎.github/workflows/spark_sql_test_ansi.yml
+1-1 b/‎.github/workflows/spark_sql_test_ansi.yml
+1-1
diff --git a/‎README.md
+6-13 b/‎README.md
+6-13
diff --git a/‎common/pom.xml
+1-1 b/‎common/pom.xml
+1-1
diff --git a/‎common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java
+14 b/‎common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java
+14
diff --git a/‎common/src/main/java/org/apache/comet/parquet/BatchReader.java
+1-1 b/‎common/src/main/java/org/apache/comet/parquet/BatchReader.java
+1-1
diff --git a/‎common/src/main/java/org/apache/comet/parquet/Native.java
+52 b/‎common/src/main/java/org/apache/comet/parquet/Native.java
+52
@@ -67,4 +67,4 @@ runs:
 
     - name: Upload coverage results
       if: ${{ inputs.upload-test-reports == 'true' }}
-      uses: codecov/codecov-action@v3 # uses v3 as it allows tokenless uploading
+      uses: codecov/codecov-action@v5
@@ -211,8 +211,7 @@ jobs:
         uses: ./.github/actions/java-test
         with:
           maven_opts: -Pspark-${{ matrix.spark-version }}
-          # https://github.com/codecov/codecov-action/issues/1549
-          # upload-test-reports: true
+          upload-test-reports: true
 
   macos-aarch64-test-with-spark4_0:
     strategy:
 
@@ -71,7 +71,7 @@ jobs:
         with:
           spark-version: ${{ matrix.spark-version.full }}
           spark-short-version: ${{ matrix.spark-version.short }}
-          comet-version: '0.5.0-SNAPSHOT' # TODO: get this from pom.xml
+          comet-version: '0.6.0-SNAPSHOT' # TODO: get this from pom.xml
       - name: Run Spark tests
         run: |
           cd apache-spark
 
@@ -69,7 +69,7 @@ jobs:
         with:
           spark-version: ${{ matrix.spark-version.full }}
           spark-short-version: ${{ matrix.spark-version.short }}
-          comet-version: '0.5.0-SNAPSHOT' # TODO: get this from pom.xml
+          comet-version: '0.6.0-SNAPSHOT' # TODO: get this from pom.xml
       - name: Run Spark tests
         run: |
           cd apache-spark
 
@@ -46,30 +46,23 @@ The following chart shows the time it takes to run the 22 TPC-H queries against
 using a single executor with 8 cores. See the [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html)
 for details of the environment used for these benchmarks.
 
-When using Comet, the overall run time is reduced from 615 seconds to 364 seconds, a 1.7x speedup, with query 1
-running 9x faster than Spark.
+When using Comet, the overall run time is reduced from 640 seconds to 331 seconds, very close to a 2x speedup.
 
-Running the same queries with DataFusion standalone (without Spark) using the same number of cores results in a 3.6x 
-speedup compared to Spark.
+![](docs/source/_static/images/benchmark-results/0.5.0/tpch_allqueries.png)
 
-Comet is not yet achieving full DataFusion speeds in all cases, but with future work we aim to provide a 2x-4x speedup 
-for a broader set of queries.
+Here is a breakdown showing relative performance of Spark and Comet for each TPC-H query.
 
-![](docs/source/_static/images/benchmark-results/0.4.0/tpch_allqueries.png)
-
-Here is a breakdown showing relative performance of Spark, Comet, and DataFusion for each TPC-H query.
-
-![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_compare.png)
+![](docs/source/_static/images/benchmark-results/0.5.0/tpch_queries_compare.png)
 
 The following charts shows how much Comet currently accelerates each query from the benchmark.
 
 ### Relative speedup
 
-![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_rel.png)
+![](docs/source/_static/images/benchmark-results/0.5.0/tpch_queries_speedup_rel.png)
 
 ### Absolute speedup
 
-![](docs/source/_static/images/benchmark-results/0.4.0/tpch_queries_speedup_abs.png)
+![](docs/source/_static/images/benchmark-results/0.5.0/tpch_queries_speedup_abs.png)
 
 These benchmarks can be reproduced in any environment using the documentation in the 
 [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html). We encourage 
 
@@ -26,7 +26,7 @@ under the License.
   <parent>
     <groupId>org.apache.datafusion</groupId>
     <artifactId>comet-parent-spark${spark.version.short}_${scala.binary.version}</artifactId>
-    <version>0.5.0-SNAPSHOT</version>
+    <version>0.6.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
 
@@ -23,6 +23,7 @@
 import org.slf4j.LoggerFactory;
 
 import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.schema.Type;
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.TimestampNTZType$;
 
@@ -36,6 +37,9 @@ public abstract class AbstractColumnReader implements AutoCloseable {
   /** The Spark data type. */
   protected final DataType type;
 
+  /** The Spark data type. */
+  protected final Type fieldType;
+
   /** Parquet column descriptor. */
   protected final ColumnDescriptor descriptor;
 
@@ -61,13 +65,23 @@ public abstract class AbstractColumnReader implements AutoCloseable {
 
   public AbstractColumnReader(
       DataType type,
+      Type fieldType,
       ColumnDescriptor descriptor,
       boolean useDecimal128,
       boolean useLegacyDateTimestamp) {
     this.type = type;
+    this.fieldType = fieldType;
     this.descriptor = descriptor;
     this.useDecimal128 = useDecimal128;
     this.useLegacyDateTimestamp = useLegacyDateTimestamp;
+  }
+
+  public AbstractColumnReader(
+      DataType type,
+      ColumnDescriptor descriptor,
+      boolean useDecimal128,
+      boolean useLegacyDateTimestamp) {
+    this(type, null, descriptor, useDecimal128, useLegacyDateTimestamp);
     TypeUtil.checkParquetType(descriptor, type);
   }
 
 
@@ -272,7 +272,7 @@ public void init() throws URISyntaxException, IOException {
       requestedSchema =
           CometParquetReadSupport.clipParquetSchema(
               requestedSchema, sparkSchema, isCaseSensitive, useFieldId, ignoreMissingIds);
-      if (requestedSchema.getColumns().size() != sparkSchema.size()) {
+      if (requestedSchema.getFieldCount() != sparkSchema.size()) {
         throw new IllegalArgumentException(
             String.format(
                 "Spark schema has %d columns while " + "Parquet schema has %d columns",
 
@@ -234,4 +234,56 @@ public static native void setPageV2(
    * @param handle the handle to the native Parquet column reader
    */
   public static native void closeColumnReader(long handle);
+
+  ///////////// Arrow Native Parquet Reader APIs
+  // TODO: Add partitionValues(?), improve requiredColumns to use a projection mask that corresponds
+  // to arrow.
+  //      Add batch size, datetimeRebaseModeSpec, metrics(how?)...
+
+  /**
+   * Initialize a record batch reader for a PartitionedFile
+   *
+   * @param filePath
+   * @param start
+   * @param length
+   * @return a handle to the record batch reader, used in subsequent calls.
+   */
+  public static native long initRecordBatchReader(
+      String filePath,
+      long fileSize,
+      long start,
+      long length,
+      byte[] requiredSchema,
+      String sessionTimezone);
+
+  // arrow native version of read batch
+  /**
+   * Read the next batch of data into memory on native side
+   *
+   * @param handle
+   * @return the number of rows read
+   */
+  public static native int readNextRecordBatch(long handle);
+
+  // arrow native equivalent of currentBatch. 'columnNum' is number of the column in the record
+  // batch
+  /**
+   * Load the column corresponding to columnNum in the currently loaded record batch into JVM
+   *
+   * @param handle
+   * @param columnNum
+   * @param arrayAddr
+   * @param schemaAddr
+   */
+  public static native void currentColumnBatch(
+      long handle, int columnNum, long arrayAddr, long schemaAddr);
+
+  // arrow native version to close record batch reader
+
+  /**
+   * Close the record batch reader. Free the resources
+   *
+   * @param handle
+   */
+  public static native void closeRecordBatchReader(long handle);
 }