diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt index 867cb4ac59f12..fd8e3e334bbdc 100644 --- a/sql/core/benchmarks/CSVBenchmark-results.txt +++ b/sql/core/benchmarks/CSVBenchmark-results.txt @@ -2,76 +2,81 @@ Benchmark to measure CSV read/write performance ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Parsing quoted values: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -One quoted string 26170 26230 94 0.0 523394.1 1.0X +One quoted string 10877 10913 57 0.0 217531.6 1.0X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Wide rows with 1000 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 1000 columns 51860 52209 580 0.0 51859.6 1.0X -Select 100 columns 23745 23781 43 0.0 23745.3 2.2X -Select one column 20220 20278 56 0.0 20219.6 2.6X -count() 3218 3308 105 0.3 3218.2 16.1X -Select 100 columns, one bad input field 28039 28266 212 0.0 28039.4 1.8X -Select 100 columns, corrupt record field 31122 31132 17 0.0 31122.3 1.7X +Select 1000 columns 41330 42137 916 0.0 41330.3 1.0X +Select 100 columns 15231 15390 189 0.1 15231.0 2.7X +Select one column 12603 12667 61 0.1 12603.2 3.3X +count() 2610 2630 28 0.4 2610.3 15.8X +Select 100 columns, one bad input field 17949 18138 202 0.1 17949.1 2.3X +Select 100 columns, corrupt record field 20239 20372 126 0.0 20239.1 2.0X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Count a dataset with 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 10 columns + count() 9648 9682 35 1.0 964.8 1.0X -Select 1 column + count() 6694 6706 16 1.5 669.4 1.4X -count() 1548 1560 19 6.5 154.8 6.2X +Select 10 columns + count() 6079 6168 80 1.6 607.9 1.0X +Select 1 column + count() 3674 3760 112 2.7 367.4 1.7X +count() 870 882 17 11.5 87.0 7.0X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Write dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Create a dataset of timestamps 834 845 16 12.0 83.4 1.0X -to_csv(timestamp) 5794 5808 21 1.7 579.4 0.1X -write timestamps to files 6073 6082 11 1.6 607.3 0.1X -Create a dataset of dates 959 968 12 10.4 95.9 0.9X -to_csv(date) 3980 3987 6 2.5 398.0 0.2X -write dates to files 3894 3899 5 2.6 389.4 0.2X +Create a dataset of timestamps 712 755 38 14.0 71.2 1.0X +to_csv(timestamp) 4106 4176 66 2.4 410.6 0.2X +write timestamps to files 4352 4365 13 2.3 435.2 0.2X +Create a dataset of dates 841 846 7 11.9 84.1 0.8X +to_csv(date) 2660 2674 19 3.8 266.0 0.3X +write dates to files 2942 3003 80 3.4 294.2 0.2X +Create a dataset of times 771 789 28 13.0 77.1 0.9X +to_csv(time) 3086 3130 47 3.2 308.6 0.2X +write times to files 3271 3390 119 3.1 327.1 0.2X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Read dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------------------------------- -read timestamp text from files 1180 1186 4 8.5 118.0 1.0X -read timestamps from files 9655 9670 19 1.0 965.5 0.1X -infer timestamps from files 19167 19244 68 0.5 1916.7 0.1X -read date text from files 1111 1129 22 9.0 111.1 1.1X -read date from files 9513 9521 7 1.1 951.3 0.1X -infer date from files 19126 19159 31 0.5 1912.6 0.1X -timestamp strings 1137 1144 7 8.8 113.7 1.0X -parse timestamps from Dataset[String] 10759 10774 22 0.9 1075.9 0.1X -infer timestamps from Dataset[String] 19823 19835 13 0.5 1982.3 0.1X -date strings 1579 1583 5 6.3 157.9 0.7X -parse dates from Dataset[String] 11033 11055 22 0.9 1103.3 0.1X -from_csv(timestamp) 8860 8864 6 1.1 886.0 0.1X -from_csv(date) 9649 9670 27 1.0 964.9 0.1X -infer error timestamps from Dataset[String] with default format 11156 11157 1 0.9 1115.6 0.1X -infer error timestamps from Dataset[String] with user-provided format 11118 11147 26 0.9 1111.8 0.1X -infer error timestamps from Dataset[String] with legacy format 11140 11152 10 0.9 1114.0 0.1X +read timestamp text from files 659 665 5 15.2 65.9 1.0X +read timestamps from files 7524 7566 66 1.3 752.4 0.1X +infer timestamps from files 14004 14125 123 0.7 1400.4 0.0X +read date text from files 551 554 2 18.1 55.1 1.2X +read date from files 5445 5496 47 1.8 544.5 0.1X +infer date from files 10910 10918 8 0.9 1091.0 0.1X +timestamp strings 762 775 18 13.1 76.2 0.9X +parse timestamps from Dataset[String] 5936 6036 89 1.7 593.6 0.1X +infer timestamps from Dataset[String] 10598 10664 67 0.9 1059.8 0.1X +date strings 1205 1212 9 8.3 120.5 0.5X +parse dates from Dataset[String] 6858 6911 49 1.5 685.8 0.1X +from_csv(timestamp) 4824 4859 33 2.1 482.4 0.1X +from_csv(date) 6096 6101 7 1.6 609.6 0.1X +infer error timestamps from Dataset[String] with default format 7161 7167 7 1.4 716.1 0.1X +infer error timestamps from Dataset[String] with user-provided format 7225 7311 136 1.4 722.5 0.1X +infer error timestamps from Dataset[String] with legacy format 7094 7244 131 1.4 709.4 0.1X +read time text from files 587 592 5 17.0 58.7 1.1X +read time from files 4141 4253 109 2.4 414.1 0.2X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Filters pushdown: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -w/o filters 4268 4277 9 0.0 42682.0 1.0X -pushdown disabled 4250 4254 5 0.0 42501.3 1.0X -w/ filters 863 869 5 0.1 8634.6 4.9X +w/o filters 4222 4293 64 0.0 42222.8 1.0X +pushdown disabled 4170 4176 9 0.0 41702.7 1.0X +w/ filters 520 526 9 0.2 5198.6 8.1X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Read as Intervals 748 749 2 0.4 2493.1 1.0X -Read Raw Strings 304 305 1 1.0 1014.7 2.5X +Read as Intervals 423 426 4 0.7 1408.4 1.0X +Read Raw Strings 170 172 2 1.8 565.5 2.5X diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt index e57086bcc84a0..7e0891075382b 100644 --- a/sql/core/benchmarks/ExtractBenchmark-results.txt +++ b/sql/core/benchmarks/ExtractBenchmark-results.txt @@ -1,104 +1,122 @@ -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Invoke extract for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to timestamp 257 270 18 39.0 25.7 1.0X -YEAR of timestamp 684 690 5 14.6 68.4 0.4X -YEAROFWEEK of timestamp 752 776 39 13.3 75.2 0.3X -QUARTER of timestamp 711 726 21 14.1 71.1 0.4X -MONTH of timestamp 699 706 8 14.3 69.9 0.4X -WEEK of timestamp 958 965 8 10.4 95.8 0.3X -DAY of timestamp 696 709 15 14.4 69.6 0.4X -DAYOFWEEK of timestamp 836 840 5 12.0 83.6 0.3X -DOW of timestamp 836 844 12 12.0 83.6 0.3X -DOW_ISO of timestamp 814 815 1 12.3 81.4 0.3X -DAYOFWEEK_ISO of timestamp 812 816 3 12.3 81.2 0.3X -DOY of timestamp 710 712 2 14.1 71.0 0.4X -HOUR of timestamp 577 587 14 17.3 57.7 0.4X -MINUTE of timestamp 582 584 3 17.2 58.2 0.4X -SECOND of timestamp 681 683 2 14.7 68.1 0.4X +cast to timestamp 191 200 8 52.3 19.1 1.0X +YEAR of timestamp 772 785 18 13.0 77.2 0.2X +YEAROFWEEK of timestamp 887 899 18 11.3 88.7 0.2X +QUARTER of timestamp 782 794 15 12.8 78.2 0.2X +MONTH of timestamp 773 804 46 12.9 77.3 0.2X +WEEK of timestamp 1072 1090 26 9.3 107.2 0.2X +DAY of timestamp 781 786 5 12.8 78.1 0.2X +DAYOFWEEK of timestamp 973 979 8 10.3 97.3 0.2X +DOW of timestamp 970 979 8 10.3 97.0 0.2X +DOW_ISO of timestamp 887 894 9 11.3 88.7 0.2X +DAYOFWEEK_ISO of timestamp 876 887 18 11.4 87.6 0.2X +DOY of timestamp 842 854 10 11.9 84.2 0.2X +HOUR of timestamp 613 615 2 16.3 61.3 0.3X +MINUTE of timestamp 628 634 6 15.9 62.8 0.3X +SECOND of timestamp 686 692 10 14.6 68.6 0.3X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Invoke date_part for timestamp: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to timestamp 233 236 3 42.9 23.3 1.0X -YEAR of timestamp 686 693 8 14.6 68.6 0.3X -YEAROFWEEK of timestamp 741 744 3 13.5 74.1 0.3X -QUARTER of timestamp 708 713 4 14.1 70.8 0.3X -MONTH of timestamp 693 704 14 14.4 69.3 0.3X -WEEK of timestamp 956 960 4 10.5 95.6 0.2X -DAY of timestamp 691 696 5 14.5 69.1 0.3X -DAYOFWEEK of timestamp 830 837 8 12.0 83.0 0.3X -DOW of timestamp 830 831 0 12.0 83.0 0.3X -DOW_ISO of timestamp 803 809 11 12.5 80.3 0.3X -DAYOFWEEK_ISO of timestamp 803 808 8 12.5 80.3 0.3X -DOY of timestamp 707 714 9 14.1 70.7 0.3X -HOUR of timestamp 573 575 2 17.5 57.3 0.4X -MINUTE of timestamp 570 575 5 17.5 57.0 0.4X -SECOND of timestamp 683 686 2 14.6 68.3 0.3X +cast to timestamp 156 157 1 64.1 15.6 1.0X +YEAR of timestamp 770 797 31 13.0 77.0 0.2X +YEAROFWEEK of timestamp 921 943 37 10.9 92.1 0.2X +QUARTER of timestamp 778 788 15 12.9 77.8 0.2X +MONTH of timestamp 752 760 11 13.3 75.2 0.2X +WEEK of timestamp 1133 1159 25 8.8 113.3 0.1X +DAY of timestamp 769 773 7 13.0 76.9 0.2X +DAYOFWEEK of timestamp 1005 1016 12 10.0 100.5 0.2X +DOW of timestamp 1015 1075 54 9.9 101.5 0.2X +DOW_ISO of timestamp 923 932 7 10.8 92.3 0.2X +DAYOFWEEK_ISO of timestamp 921 923 1 10.9 92.1 0.2X +DOY of timestamp 885 906 27 11.3 88.5 0.2X +HOUR of timestamp 620 626 9 16.1 62.0 0.3X +MINUTE of timestamp 632 638 9 15.8 63.2 0.2X +SECOND of timestamp 713 715 2 14.0 71.3 0.2X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Invoke extract for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to date 565 569 5 17.7 56.5 1.0X -YEAR of date 690 691 2 14.5 69.0 0.8X -YEAROFWEEK of date 734 736 2 13.6 73.4 0.8X -QUARTER of date 702 704 3 14.2 70.2 0.8X -MONTH of date 689 693 4 14.5 68.9 0.8X -WEEK of date 946 954 6 10.6 94.6 0.6X -DAY of date 684 689 6 14.6 68.4 0.8X -DAYOFWEEK of date 828 831 4 12.1 82.8 0.7X -DOW of date 826 827 1 12.1 82.6 0.7X -DOW_ISO of date 796 805 11 12.6 79.6 0.7X -DAYOFWEEK_ISO of date 798 801 3 12.5 79.8 0.7X -DOY of date 710 712 4 14.1 71.0 0.8X -HOUR of date 1177 1186 13 8.5 117.7 0.5X -MINUTE of date 1173 1175 2 8.5 117.3 0.5X -SECOND of date 1265 1272 11 7.9 126.5 0.4X +cast to date 692 692 1 14.5 69.2 1.0X +YEAR of date 771 776 6 13.0 77.1 0.9X +YEAROFWEEK of date 940 946 6 10.6 94.0 0.7X +QUARTER of date 788 810 36 12.7 78.8 0.9X +MONTH of date 765 770 6 13.1 76.5 0.9X +WEEK of date 1186 1194 9 8.4 118.6 0.6X +DAY of date 774 786 11 12.9 77.4 0.9X +DAYOFWEEK of date 1018 1023 4 9.8 101.8 0.7X +DOW of date 1015 1031 25 9.9 101.5 0.7X +DOW_ISO of date 937 962 25 10.7 93.7 0.7X +DAYOFWEEK_ISO of date 924 933 10 10.8 92.4 0.7X +DOY of date 885 913 26 11.3 88.5 0.8X +HOUR of date 2092 2099 7 4.8 209.2 0.3X +MINUTE of date 1904 1927 23 5.3 190.4 0.4X +SECOND of date 2224 2242 22 4.5 222.4 0.3X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Invoke date_part for date: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to date 559 565 8 17.9 55.9 1.0X -YEAR of date 688 691 3 14.5 68.8 0.8X -YEAROFWEEK of date 737 741 5 13.6 73.7 0.8X -QUARTER of date 700 703 5 14.3 70.0 0.8X -MONTH of date 686 690 7 14.6 68.6 0.8X -WEEK of date 947 948 1 10.6 94.7 0.6X -DAY of date 683 685 2 14.6 68.3 0.8X -DAYOFWEEK of date 825 826 1 12.1 82.5 0.7X -DOW of date 822 826 4 12.2 82.2 0.7X -DOW_ISO of date 797 801 4 12.5 79.7 0.7X -DAYOFWEEK_ISO of date 797 802 4 12.5 79.7 0.7X -DOY of date 706 706 1 14.2 70.6 0.8X -HOUR of date 1174 1180 6 8.5 117.4 0.5X -MINUTE of date 1169 1171 2 8.6 116.9 0.5X -SECOND of date 1265 1268 3 7.9 126.5 0.4X +cast to date 692 702 10 14.5 69.2 1.0X +YEAR of date 779 782 5 12.8 77.9 0.9X +YEAROFWEEK of date 949 960 13 10.5 94.9 0.7X +QUARTER of date 777 785 7 12.9 77.7 0.9X +MONTH of date 764 768 4 13.1 76.4 0.9X +WEEK of date 1212 1223 12 8.2 121.2 0.6X +DAY of date 783 819 56 12.8 78.3 0.9X +DAYOFWEEK of date 1033 1038 6 9.7 103.3 0.7X +DOW of date 1031 1053 35 9.7 103.1 0.7X +DOW_ISO of date 923 942 22 10.8 92.3 0.7X +DAYOFWEEK_ISO of date 935 939 3 10.7 93.5 0.7X +DOY of date 879 893 12 11.4 87.9 0.8X +HOUR of date 2101 2121 27 4.8 210.1 0.3X +MINUTE of date 1927 1933 7 5.2 192.7 0.4X +SECOND of date 2251 2261 16 4.4 225.1 0.3X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +Invoke extract for time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to time 349 352 3 28.7 34.9 1.0X +HOUR of time 380 404 35 26.3 38.0 0.9X +MINUTE of time 380 381 1 26.3 38.0 0.9X +SECOND of time 2053 2058 4 4.9 205.3 0.2X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +Invoke date_part for time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +cast to time 345 346 1 28.9 34.5 1.0X +HOUR of time 382 382 1 26.2 38.2 0.9X +MINUTE of time 381 383 2 26.2 38.1 0.9X +SECOND of time 2062 2066 7 4.8 206.2 0.2X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Invoke extract for interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to interval 859 862 4 11.6 85.9 1.0X -YEAR of interval 828 828 1 12.1 82.8 1.0X -MONTH of interval 842 854 20 11.9 84.2 1.0X -DAY of interval 827 832 7 12.1 82.7 1.0X -HOUR of interval 848 850 2 11.8 84.8 1.0X -MINUTE of interval 852 860 14 11.7 85.2 1.0X -SECOND of interval 941 943 2 10.6 94.1 0.9X +cast to interval 916 923 9 10.9 91.6 1.0X +YEAR of interval 937 947 9 10.7 93.7 1.0X +MONTH of interval 935 941 8 10.7 93.5 1.0X +DAY of interval 893 912 23 11.2 89.3 1.0X +HOUR of interval 941 954 17 10.6 94.1 1.0X +MINUTE of interval 980 988 8 10.2 98.0 0.9X +SECOND of interval 868 871 5 11.5 86.8 1.1X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Invoke date_part for interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -cast to interval 851 852 0 11.7 85.1 1.0X -YEAR of interval 823 825 3 12.1 82.3 1.0X -MONTH of interval 833 838 4 12.0 83.3 1.0X -DAY of interval 835 836 2 12.0 83.5 1.0X -HOUR of interval 846 851 6 11.8 84.6 1.0X -MINUTE of interval 857 859 2 11.7 85.7 1.0X -SECOND of interval 942 949 10 10.6 94.2 0.9X +cast to interval 909 920 18 11.0 90.9 1.0X +YEAR of interval 939 949 10 10.7 93.9 1.0X +MONTH of interval 921 932 11 10.9 92.1 1.0X +DAY of interval 924 931 10 10.8 92.4 1.0X +HOUR of interval 942 945 3 10.6 94.2 1.0X +MINUTE of interval 998 1007 9 10.0 99.8 0.9X +SECOND of interval 880 889 15 11.4 88.0 1.0X diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt index f6b52b4def0cb..ebfb547426a51 100644 --- a/sql/core/benchmarks/JsonBenchmark-results.txt +++ b/sql/core/benchmarks/JsonBenchmark-results.txt @@ -3,128 +3,133 @@ Benchmark for performance of JSON parsing ================================================================================================ Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro JSON schema inferring: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 1899 1983 91 2.6 379.7 1.0X -UTF-8 is set 5121 5134 13 1.0 1024.2 0.4X +No encoding 1069 1158 154 4.7 213.8 1.0X +UTF-8 is set 2298 2361 101 2.2 459.5 0.5X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro count a short column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 1927 1963 55 2.6 385.5 1.0X -UTF-8 is set 4451 4462 10 1.1 890.2 0.4X +No encoding 978 1033 86 5.1 195.6 1.0X +UTF-8 is set 2095 2180 101 2.4 419.0 0.5X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro count a wide column: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 5230 5246 16 0.2 5230.0 1.0X -UTF-8 is set 4768 4856 88 0.2 4767.9 1.1X +No encoding 2349 2441 84 0.4 2348.5 1.0X +UTF-8 is set 3257 3305 58 0.3 3256.8 0.7X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro select wide row: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -No encoding 8996 9163 215 0.0 179920.8 1.0X -UTF-8 is set 9757 9790 31 0.0 195143.4 0.9X +No encoding 5388 5520 177 0.0 107761.9 1.0X +UTF-8 is set 5643 5692 43 0.0 112869.1 1.0X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Select a subset of 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Select 10 columns 1557 1560 3 0.6 1557.2 1.0X -Select 1 column 1184 1196 20 0.8 1183.7 1.3X +Select 10 columns 755 762 11 1.3 754.9 1.0X +Select 1 column 516 525 8 1.9 516.4 1.5X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro creation of JSON parser per line: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Short column without encoding 561 563 3 1.8 561.5 1.0X -Short column with UTF-8 1140 1146 8 0.9 1139.9 0.5X -Wide column without encoding 5163 5179 23 0.2 5163.2 0.1X -Wide column with UTF-8 9810 9833 22 0.1 9810.2 0.1X +Short column without encoding 277 283 8 3.6 276.9 1.0X +Short column with UTF-8 485 486 1 2.1 484.5 0.6X +Wide column without encoding 4075 4126 77 0.2 4075.3 0.1X +Wide column with UTF-8 4631 4678 52 0.2 4630.9 0.1X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro JSON functions: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 62 66 4 16.2 61.7 1.0X -from_json 969 979 9 1.0 968.7 0.1X -json_tuple 905 908 4 1.1 905.1 0.1X -get_json_object wholestage off 938 942 4 1.1 938.3 0.1X -get_json_object wholestage on 835 847 18 1.2 835.0 0.1X +Text read 41 42 1 24.3 41.2 1.0X +from_json 591 600 9 1.7 590.8 0.1X +json_tuple 577 587 8 1.7 577.2 0.1X +get_json_object wholestage off 555 574 24 1.8 554.5 0.1X +get_json_object wholestage on 534 546 10 1.9 533.6 0.1X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Dataset of json strings: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 254 256 3 19.7 50.7 1.0X -schema inferring 1504 1524 26 3.3 300.8 0.2X -parsing 2391 2421 27 2.1 478.2 0.1X +Text read 172 173 1 29.0 34.5 1.0X +schema inferring 746 748 1 6.7 149.2 0.2X +parsing 1153 1165 13 4.3 230.7 0.1X Preparing data for benchmarking ... -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Json files in the per-line mode: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Text read 624 652 30 8.0 124.8 1.0X -Schema inferring 2032 2036 5 2.5 406.5 0.3X -Parsing without charset 2527 2529 3 2.0 505.4 0.2X -Parsing with UTF-8 5453 5470 27 0.9 1090.6 0.1X +Text read 303 307 4 16.5 60.5 1.0X +Schema inferring 998 1012 18 5.0 199.6 0.3X +Parsing without charset 1119 1140 21 4.5 223.7 0.3X +Parsing with UTF-8 2281 2318 62 2.2 456.1 0.1X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Write dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Create a dataset of timestamps 107 110 4 9.4 106.5 1.0X -to_json(timestamp) 576 582 9 1.7 576.3 0.2X -write timestamps to files 623 626 4 1.6 623.2 0.2X -Create a dataset of dates 120 123 3 8.3 120.1 0.9X -to_json(date) 391 398 7 2.6 391.0 0.3X -write dates to files 415 418 5 2.4 415.3 0.3X +Create a dataset of timestamps 55 58 3 18.3 54.7 1.0X +to_json(timestamp) 472 479 6 2.1 472.2 0.1X +write timestamps to files 617 625 8 1.6 617.4 0.1X +Create a dataset of dates 85 87 2 11.8 84.9 0.6X +to_json(date) 243 250 8 4.1 242.7 0.2X +write dates to files 464 481 18 2.2 464.4 0.1X +Create a dataset of times 80 84 4 12.5 79.9 0.7X +to_json(time) 279 288 13 3.6 278.9 0.2X +write times to files 481 488 10 2.1 480.8 0.1X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Read dates and timestamps: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------------------------------- -read timestamp text from files 163 165 4 6.1 162.8 1.0X -read timestamps from files 987 997 12 1.0 987.2 0.2X -infer timestamps from files 1835 1847 20 0.5 1835.4 0.1X -read date text from files 150 153 3 6.7 149.8 1.1X -read date from files 621 623 2 1.6 621.4 0.3X -timestamp strings 145 146 1 6.9 145.2 1.1X -parse timestamps from Dataset[String] 1171 1185 21 0.9 1170.6 0.1X -infer timestamps from Dataset[String] 2012 2015 5 0.5 2011.7 0.1X -date strings 215 217 2 4.6 215.2 0.8X -parse dates from Dataset[String] 927 931 5 1.1 927.4 0.2X -from_json(timestamp) 1635 1639 6 0.6 1635.3 0.1X -from_json(date) 1405 1412 6 0.7 1405.0 0.1X -infer error timestamps from Dataset[String] with default format 1273 1276 5 0.8 1273.4 0.1X -infer error timestamps from Dataset[String] with user-provided format 1257 1260 3 0.8 1256.7 0.1X -infer error timestamps from Dataset[String] with legacy format 1258 1260 3 0.8 1258.0 0.1X +read timestamp text from files 90 91 1 11.1 90.1 1.0X +read timestamps from files 583 591 7 1.7 583.3 0.2X +infer timestamps from files 6711 6806 110 0.1 6711.1 0.0X +read date text from files 81 83 3 12.4 80.9 1.1X +read date from files 287 287 1 3.5 286.9 0.3X +timestamp strings 82 83 1 12.1 82.4 1.1X +parse timestamps from Dataset[String] 503 504 2 2.0 502.6 0.2X +infer timestamps from Dataset[String] 6086 6107 23 0.2 6086.3 0.0X +date strings 135 145 11 7.4 135.5 0.7X +parse dates from Dataset[String] 410 417 9 2.4 410.4 0.2X +from_json(timestamp) 880 895 13 1.1 880.1 0.1X +from_json(date) 756 760 4 1.3 756.0 0.1X +infer error timestamps from Dataset[String] with default format 5478 5559 81 0.2 5477.5 0.0X +infer error timestamps from Dataset[String] with user-provided format 5470 5569 134 0.2 5470.4 0.0X +infer error timestamps from Dataset[String] with legacy format 5585 5590 8 0.2 5585.4 0.0X +read time text from files 81 82 1 12.4 80.8 1.1X +read time from files 358 419 96 2.8 358.3 0.3X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Filters pushdown: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -w/o filters 4817 4833 15 0.0 48167.4 1.0X -pushdown disabled 4772 4776 5 0.0 47721.9 1.0X -w/ filters 695 710 16 0.1 6949.2 6.9X +w/o filters 4669 4841 296 0.0 46694.5 1.0X +pushdown disabled 4625 4725 117 0.0 46247.7 1.0X +w/ filters 411 430 17 0.2 4106.4 11.4X -OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure -AMD EPYC 9V74 80-Core Processor +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro Partial JSON results: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -parse invalid JSON 2386 2451 111 0.0 238564.1 1.0X +parse invalid JSON 1257 1292 37 0.0 125652.2 1.0X diff --git a/sql/core/benchmarks/TimeBenchmark-results.txt b/sql/core/benchmarks/TimeBenchmark-results.txt new file mode 100644 index 0000000000000..82e63a9ec93c1 --- /dev/null +++ b/sql/core/benchmarks/TimeBenchmark-results.txt @@ -0,0 +1,126 @@ +================================================================================================ +Current time +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +current_time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +current_time wholestage off 144 145 2 69.4 14.4 1.0X +current_time wholestage on 147 187 45 67.9 14.7 1.0X + + +================================================================================================ +make_time +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +make_time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +make_time wholestage off 322 323 1 31.0 32.2 1.0X +make_time wholestage on 308 310 2 32.5 30.8 1.0X + + +================================================================================================ +Parsing time +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +to_time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +to_time wholestage off 5902 5970 97 1.7 590.2 1.0X +to_time wholestage on 5824 5959 99 1.7 582.4 1.0X + + +================================================================================================ +Extract components from TIME +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +hour of time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +hour of time wholestage off 365 365 1 27.4 36.5 1.0X +hour of time wholestage on 354 356 2 28.2 35.4 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +minute of time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +minute of time wholestage off 349 351 2 28.6 34.9 1.0X +minute of time wholestage on 353 356 2 28.4 35.3 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +second of time: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +second of time wholestage off 347 348 1 28.8 34.7 1.0X +second of time wholestage on 350 352 2 28.6 35.0 1.0X + + +================================================================================================ +time_trunc +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +time_trunc HOUR: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time_trunc HOUR wholestage off 709 714 7 14.1 70.9 1.0X +time_trunc HOUR wholestage on 687 706 29 14.6 68.7 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +time_trunc MINUTE: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time_trunc MINUTE wholestage off 689 693 6 14.5 68.9 1.0X +time_trunc MINUTE wholestage on 692 696 5 14.5 69.2 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +time_trunc SECOND: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time_trunc SECOND wholestage off 685 686 2 14.6 68.5 1.0X +time_trunc SECOND wholestage on 719 728 5 13.9 71.9 1.0X + + +================================================================================================ +time_diff +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +time_diff: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time_diff wholestage off 999 1011 17 10.0 99.9 1.0X +time_diff wholestage on 708 712 7 14.1 70.8 1.4X + + +================================================================================================ +TIME +/- interval +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +TIME +/- interval: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +time + interval hour 330 331 1 30.3 33.0 1.0X +time + interval minute 327 331 3 30.6 32.7 1.0X +time + interval second 328 334 4 30.5 32.8 1.0X +time - interval hour 335 339 5 29.8 33.5 1.0X + + +================================================================================================ +Conversion from/to external types +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Mac OS X 26.5 +Apple M3 Pro +To/from java.time.LocalTime: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------ +From java.time.LocalTime 133 135 2 37.6 26.6 1.0X +Collect java.time.LocalTime 606 634 27 8.2 121.2 0.2X + + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala index 2fffa265cb735..b1a64973faafa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala @@ -64,6 +64,8 @@ object ExtractBenchmark extends SqlBasedBenchmark { private def castExpr(from: String): String = from match { case "timestamp" => "timestamp_seconds(id)" case "date" => "cast(timestamp_seconds(id) as date)" + case "time" => "make_time(cast(mod(id, 24) as int), cast(mod(id, 60) as int), " + + "cast(mod(id, 60) as decimal(8,6)))" case "interval" => "(cast(timestamp_seconds(id) as date) - date'0001-01-01') + " + "(timestamp_seconds(id) - timestamp'1000-01-01 01:02:03.123456')" case other => throw new IllegalArgumentException( @@ -92,9 +94,11 @@ object ExtractBenchmark extends SqlBasedBenchmark { val datetimeFields = Seq("YEAR", "YEAROFWEEK", "QUARTER", "MONTH", "WEEK", "DAY", "DAYOFWEEK", "DOW", "DOW_ISO", "DAYOFWEEK_ISO", "DOY", "HOUR", "MINUTE", "SECOND") val intervalFields = Seq("YEAR", "MONTH", "DAY", "HOUR", "MINUTE", "SECOND") + val timeFields = Seq("HOUR", "MINUTE", "SECOND") val settings = Map( "timestamp" -> datetimeFields, "date" -> datetimeFields, + "time" -> timeFields, "interval" -> intervalFields) for {(dataType, fields) <- settings; func <- Seq("extract", "date_part")} { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TimeBenchmark.scala new file mode 100644 index 0000000000000..86268ef5488ba --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TimeBenchmark.scala @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import java.time.LocalTime + +import org.apache.spark.benchmark.Benchmark + +/** + * Synthetic benchmark for TIME data type functions. + * To run this benchmark: + * {{{ + * 1. without sbt: + * bin/spark-submit --class + * --jars , + * 2. build/sbt "sql/Test/runMain " + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain " + * Results will be written to "benchmarks/TimeBenchmark-results.txt". + * }}} + */ +object TimeBenchmark extends SqlBasedBenchmark { + private def doBenchmark(cardinality: Int, exprs: String*): Unit = { + spark.range(cardinality) + .selectExpr(exprs: _*) + .noop() + } + + private def run(cardinality: Int, name: String, exprs: String*): Unit = { + codegenBenchmark(name, cardinality) { + doBenchmark(cardinality, exprs: _*) + } + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val N = 10000000 + // Generate TIME values using make_time(hour, minute, decimal_seconds) + val timeExpr = "make_time(cast(mod(id, 24) as int), cast(mod(id, 60) as int), " + + "cast(mod(id, 60) as decimal(8,6)))" + + runBenchmark("Current time") { + run(N, "current_time", "current_time()") + } + + runBenchmark("make_time") { + run(N, "make_time", timeExpr) + } + + runBenchmark("Parsing time") { + val timeStrExpr = "concat(lpad(cast(mod(id, 24) as string), 2, '0'), ':', " + + "lpad(cast(mod(id, 60) as string), 2, '0'), ':', " + + "lpad(cast(mod(id, 60) as string), 2, '0'))" + run(N, "to_time", s"to_time($timeStrExpr, 'HH:mm:ss')") + } + + runBenchmark("Extract components from TIME") { + run(N, "hour of time", s"hour($timeExpr)") + run(N, "minute of time", s"minute($timeExpr)") + run(N, "second of time", s"second($timeExpr)") + } + + runBenchmark("time_trunc") { + Seq("HOUR", "MINUTE", "SECOND").foreach { level => + run(N, s"time_trunc $level", s"time_trunc('$level', $timeExpr)") + } + } + + runBenchmark("time_diff") { + val timeExpr2 = "make_time(cast(mod(id + 1, 24) as int), cast(mod(id + 2, 60) as int), " + + "cast(mod(id + 3, 60) as decimal(8,6)))" + run(N, "time_diff", s"time_diff('SECOND', $timeExpr, $timeExpr2)") + } + + runBenchmark("TIME +/- interval") { + // Use make_time with hour < 22 to avoid overflow when adding intervals + val safeTimeExpr = "make_time(cast(mod(id, 20) as int), cast(mod(id, 60) as int), " + + "cast(mod(id, 60) as decimal(8,6)))" + val benchmark = new Benchmark("TIME +/- interval", N, output = output) + benchmark.addCase("time + interval hour") { _ => + doBenchmark(N, s"$safeTimeExpr + interval 1 hour") + } + benchmark.addCase("time + interval minute") { _ => + doBenchmark(N, s"$safeTimeExpr + interval 30 minute") + } + benchmark.addCase("time + interval second") { _ => + doBenchmark(N, s"$safeTimeExpr + interval 45 second") + } + benchmark.addCase("time - interval hour") { _ => + // Use hours >= 1 to avoid underflow + val subTimeExpr = "make_time(cast(mod(id, 20) + 2 as int), cast(mod(id, 60) as int), " + + "cast(mod(id, 60) as decimal(8,6)))" + doBenchmark(N, s"$subTimeExpr - interval 1 hour") + } + benchmark.run() + } + + runBenchmark("Conversion from/to external types") { + import spark.implicits._ + val rowsNum = 5000000 + val numIters = 3 + val benchmark = new Benchmark("To/from java.time.LocalTime", rowsNum, output = output) + benchmark.addCase("From java.time.LocalTime", numIters) { _ => + spark.range(rowsNum) + .map(nanos => LocalTime.ofNanoOfDay(nanos % 86400000000000L)) + .noop() + } + def localTimes = { + spark.range(0, rowsNum, 1, 1) + .map(nanos => LocalTime.ofNanoOfDay(nanos % 86400000000000L)) + } + benchmark.addCase("Collect java.time.LocalTime", numIters) { _ => + localTimes.collect() + } + benchmark.run() + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala index 524c222062150..45178a4a3db5b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.datasources.csv import java.io.File -import java.time.{Instant, LocalDate} +import java.time.{Instant, LocalDate, LocalTime} import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.{Column, Dataset, Row} @@ -190,6 +190,26 @@ object CSVBenchmark extends SqlBasedBenchmark { dates.write.option("header", true).mode("overwrite").csv(dateDir) } + val timeDir = new File(path, "time").getAbsolutePath + + def times = { + spark.range(0, rowsNum, 1, 1).mapPartitions { iter => + iter.map(t => LocalTime.ofNanoOfDay((t % 86400000000L) * 1000000L)) + }.select($"value".as("time")) + } + + writeBench.addCase("Create a dataset of times", numIters) { _ => + times.noop() + } + + writeBench.addCase("to_csv(time)", numIters) { _ => + times.select(to_csv(struct($"time"))).noop() + } + + writeBench.addCase("write times to files", numIters) { _ => + times.write.option("header", true).mode("overwrite").csv(timeDir) + } + writeBench.run() val readBench = new Benchmark("Read dates and timestamps", rowsNum, output = output) @@ -323,6 +343,20 @@ object CSVBenchmark extends SqlBasedBenchmark { } } + val timeSchema = new StructType().add("time", TimeType()) + + readBench.addCase("read time text from files", numIters) { _ => + spark.read.text(timeDir).noop() + } + + readBench.addCase("read time from files", numIters) { _ => + val ds = spark.read + .option("header", true) + .schema(timeSchema) + .csv(timeDir) + ds.noop() + } + readBench.run() } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala index 94a2ccc41d30b..937b1cee23d0e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.execution.datasources.json import java.io.File -import java.time.{Instant, LocalDate} +import java.time.{Instant, LocalDate, LocalTime} import org.apache.spark.benchmark.Benchmark import org.apache.spark.sql.{Column, Dataset, Row} @@ -407,6 +407,26 @@ object JsonBenchmark extends SqlBasedBenchmark { dates.write.option("header", true).mode("overwrite").json(dateDir) } + val timeDir = new File(path, "time").getAbsolutePath + + def times = { + spark.range(0, rowsNum, 1, 1).mapPartitions { iter => + iter.map(t => LocalTime.ofNanoOfDay((t % 86400000000L) * 1000000L)) + }.select($"value".as("time")) + } + + writeBench.addCase("Create a dataset of times", numIters) { _ => + times.noop() + } + + writeBench.addCase("to_json(time)", numIters) { _ => + times.select(to_json(struct($"time"))).noop() + } + + writeBench.addCase("write times to files", numIters) { _ => + times.write.option("header", true).mode("overwrite").json(timeDir) + } + writeBench.run() val readBench = new Benchmark("Read dates and timestamps", rowsNum, output = output) @@ -508,6 +528,16 @@ object JsonBenchmark extends SqlBasedBenchmark { } } + val timeSchema = new StructType().add("time", TimeType()) + + readBench.addCase("read time text from files", numIters) { _ => + spark.read.text(timeDir).noop() + } + + readBench.addCase("read time from files", numIters) { _ => + spark.read.schema(timeSchema).json(timeDir).noop() + } + readBench.run() } }