@@ -554,36 +554,33 @@ fn filter_boolean(array: &BooleanArray, predicate: &FilterPredicate) -> BooleanA
554
554
fn filter_native < T : ArrowNativeType > ( values : & [ T ] , predicate : & FilterPredicate ) -> Buffer {
555
555
assert ! ( values. len( ) >= predicate. filter. len( ) ) ;
556
556
557
- let buffer = match & predicate. strategy {
557
+ match & predicate. strategy {
558
558
IterationStrategy :: SlicesIterator => {
559
- let mut buffer = MutableBuffer :: with_capacity ( predicate. count * T :: get_byte_width ( ) ) ;
559
+ let mut buffer = Vec :: with_capacity ( predicate. count ) ;
560
560
for ( start, end) in SlicesIterator :: new ( & predicate. filter ) {
561
561
buffer. extend_from_slice ( & values[ start..end] ) ;
562
562
}
563
- buffer
563
+ buffer. into ( )
564
564
}
565
565
IterationStrategy :: Slices ( slices) => {
566
- let mut buffer = MutableBuffer :: with_capacity ( predicate. count * T :: get_byte_width ( ) ) ;
566
+ let mut buffer = Vec :: with_capacity ( predicate. count ) ;
567
567
for ( start, end) in slices {
568
568
buffer. extend_from_slice ( & values[ * start..* end] ) ;
569
569
}
570
- buffer
570
+ buffer. into ( )
571
571
}
572
572
IterationStrategy :: IndexIterator => {
573
573
let iter = IndexIterator :: new ( & predicate. filter , predicate. count ) . map ( |x| values[ x] ) ;
574
574
575
575
// SAFETY: IndexIterator is trusted length
576
- unsafe { MutableBuffer :: from_trusted_len_iter ( iter) }
576
+ unsafe { MutableBuffer :: from_trusted_len_iter ( iter) } . into ( )
577
577
}
578
578
IterationStrategy :: Indices ( indices) => {
579
579
let iter = indices. iter ( ) . map ( |x| values[ * x] ) ;
580
- // SAFETY: `Vec::iter` is trusted length
581
- unsafe { MutableBuffer :: from_trusted_len_iter ( iter) }
580
+ iter. collect :: < Vec < _ > > ( ) . into ( )
582
581
}
583
582
IterationStrategy :: All | IterationStrategy :: None => unreachable ! ( ) ,
584
- } ;
585
-
586
- buffer. into ( )
583
+ }
587
584
}
588
585
589
586
/// `filter` implementation for primitive arrays
@@ -656,29 +653,46 @@ where
656
653
( start, end, len)
657
654
}
658
655
659
- /// Extends the in-progress array by the indexes in the provided iterator
660
- fn extend_idx ( & mut self , iter : impl Iterator < Item = usize > ) {
656
+ fn extend_offsets_idx ( & mut self , iter : impl Iterator < Item = usize > ) {
661
657
self . dst_offsets . extend ( iter. map ( |idx| {
662
658
let start = self . src_offsets [ idx] . as_usize ( ) ;
663
659
let end = self . src_offsets [ idx + 1 ] . as_usize ( ) ;
664
660
let len = OffsetSize :: from_usize ( end - start) . expect ( "illegal offset range" ) ;
665
661
self . cur_offset += len;
666
- self . dst_values
667
- . extend_from_slice ( & self . src_values [ start..end] ) ;
662
+
668
663
self . cur_offset
669
664
} ) ) ;
670
665
}
671
666
672
- /// Extends the in-progress array by the ranges in the provided iterator
673
- fn extend_slices ( & mut self , iter : impl Iterator < Item = ( usize , usize ) > ) {
667
+ /// Extends the in-progress array by the indexes in the provided iterator
668
+ fn extend_idx ( & mut self , iter : impl Iterator < Item = usize > ) {
669
+ self . dst_values . reserve_exact ( self . cur_offset . as_usize ( ) ) ;
670
+
671
+ for idx in iter {
672
+ let start = self . src_offsets [ idx] . as_usize ( ) ;
673
+ let end = self . src_offsets [ idx + 1 ] . as_usize ( ) ;
674
+ self . dst_values
675
+ . extend_from_slice ( & self . src_values [ start..end] ) ;
676
+ }
677
+ }
678
+
679
+ fn extend_offsets_slices ( & mut self , iter : impl Iterator < Item = ( usize , usize ) > , count : usize ) {
680
+ self . dst_offsets . reserve_exact ( count) ;
674
681
for ( start, end) in iter {
675
682
// These can only fail if `array` contains invalid data
676
683
for idx in start..end {
677
684
let ( _, _, len) = self . get_value_range ( idx) ;
678
685
self . cur_offset += len;
679
- self . dst_offsets . push ( self . cur_offset ) ; // push_unchecked?
686
+ self . dst_offsets . push ( self . cur_offset ) ;
680
687
}
688
+ }
689
+ }
681
690
691
+ /// Extends the in-progress array by the ranges in the provided iterator
692
+ fn extend_slices ( & mut self , iter : impl Iterator < Item = ( usize , usize ) > ) {
693
+ self . dst_values . reserve_exact ( self . cur_offset . as_usize ( ) ) ;
694
+
695
+ for ( start, end) in iter {
682
696
let value_start = self . get_value_offset ( start) ;
683
697
let value_end = self . get_value_offset ( end) ;
684
698
self . dst_values
@@ -699,13 +713,21 @@ where
699
713
700
714
match & predicate. strategy {
701
715
IterationStrategy :: SlicesIterator => {
716
+ filter. extend_offsets_slices ( SlicesIterator :: new ( & predicate. filter ) , predicate. count ) ;
702
717
filter. extend_slices ( SlicesIterator :: new ( & predicate. filter ) )
703
718
}
704
- IterationStrategy :: Slices ( slices) => filter. extend_slices ( slices. iter ( ) . cloned ( ) ) ,
719
+ IterationStrategy :: Slices ( slices) => {
720
+ filter. extend_offsets_slices ( slices. iter ( ) . cloned ( ) , predicate. count ) ;
721
+ filter. extend_slices ( slices. iter ( ) . cloned ( ) )
722
+ }
705
723
IterationStrategy :: IndexIterator => {
724
+ filter. extend_offsets_idx ( IndexIterator :: new ( & predicate. filter , predicate. count ) ) ;
706
725
filter. extend_idx ( IndexIterator :: new ( & predicate. filter , predicate. count ) )
707
726
}
708
- IterationStrategy :: Indices ( indices) => filter. extend_idx ( indices. iter ( ) . cloned ( ) ) ,
727
+ IterationStrategy :: Indices ( indices) => {
728
+ filter. extend_offsets_idx ( indices. iter ( ) . cloned ( ) ) ;
729
+ filter. extend_idx ( indices. iter ( ) . cloned ( ) )
730
+ }
709
731
IterationStrategy :: All | IterationStrategy :: None => unreachable ! ( ) ,
710
732
}
711
733
0 commit comments