Skip to content

Commit c5f3281

Browse files
ext/bcmath: Improving bcpow() performance (php#18099)
1 parent 26dbcb7 commit c5f3281

File tree

5 files changed

+258
-160
lines changed

5 files changed

+258
-160
lines changed

ext/bcmath/bcmath.c

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,26 @@ static zend_result php_str2num(bc_num *num, const zend_string *str)
179179
}
180180
/* }}} */
181181

182+
static void bc_pow_err(bc_raise_status status, uint32_t arg_num)
183+
{
184+
/* If arg_num is 0, it means it is an op */
185+
switch (status) {
186+
case BC_RAISE_STATUS_DIVIDE_BY_ZERO:
187+
zend_throw_exception_ex(zend_ce_division_by_zero_error, 0, "Negative power of zero");
188+
break;
189+
case BC_RAISE_STATUS_LEN_IS_OVERFLOW:
190+
case BC_RAISE_STATUS_SCALE_IS_OVERFLOW:
191+
case BC_RAISE_STATUS_FULLLEN_IS_OVERFLOW:
192+
if (arg_num == 0) {
193+
zend_value_error("exponent is too large, the number of digits overflowed");
194+
} else {
195+
zend_argument_value_error(arg_num, "exponent is too large, the number of digits overflowed");
196+
}
197+
break;
198+
EMPTY_SWITCH_DEFAULT_CASE();
199+
}
200+
}
201+
182202
/* {{{ Returns the sum of two arbitrary precision numbers */
183203
PHP_FUNCTION(bcadd)
184204
{
@@ -615,11 +635,11 @@ PHP_FUNCTION(bcpow)
615635
goto cleanup;
616636
}
617637

618-
if (!bc_raise(first, exponent, &result, scale)) {
619-
zend_throw_exception_ex(zend_ce_division_by_zero_error, 0, "Negative power of zero");
638+
bc_raise_status ret_status = bc_raise(first, exponent, &result, scale);
639+
if (UNEXPECTED(ret_status != BC_RAISE_STATUS_OK)) {
640+
bc_pow_err(ret_status, 2);
620641
goto cleanup;
621642
}
622-
623643
RETVAL_NEW_STR(bc_num2str_ex(result, scale));
624644

625645
cleanup: {
@@ -1144,8 +1164,9 @@ static zend_result bcmath_number_pow_internal(
11441164
}
11451165
return FAILURE;
11461166
}
1147-
if (!bc_raise(n1, exponent, ret, *scale)) {
1148-
zend_throw_exception_ex(zend_ce_division_by_zero_error, 0, "Negative power of zero");
1167+
bc_raise_status ret_status = bc_raise(n1, exponent, ret, *scale);
1168+
if (UNEXPECTED(ret_status != BC_RAISE_STATUS_OK)) {
1169+
bc_pow_err(ret_status, is_op ? 0 : 1);
11491170
return FAILURE;
11501171
}
11511172
bc_rm_trailing_zeros(*ret);

ext/bcmath/libbcmath/src/bcmath.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,6 @@ bc_num bc_multiply(bc_num n1, bc_num n2, size_t scale);
147147
*(result) = mul_ex; \
148148
} while (0)
149149

150-
bc_num bc_square(bc_num n1, size_t scale);
151-
152150
bool bc_divide(bc_num n1, bc_num n2, bc_num *quot, size_t scale);
153151

154152
bool bc_modulo(bc_num num1, bc_num num2, bc_num *resul, size_t scale);
@@ -159,6 +157,14 @@ bc_num bc_floor_or_ceil(bc_num num, bool is_floor);
159157

160158
size_t bc_round(bc_num num, zend_long places, zend_long mode, bc_num *result);
161159

160+
typedef enum {
161+
BC_RAISE_STATUS_OK,
162+
BC_RAISE_STATUS_LEN_IS_OVERFLOW,
163+
BC_RAISE_STATUS_SCALE_IS_OVERFLOW,
164+
BC_RAISE_STATUS_FULLLEN_IS_OVERFLOW,
165+
BC_RAISE_STATUS_DIVIDE_BY_ZERO,
166+
} bc_raise_status;
167+
162168
typedef enum {
163169
OK,
164170
BASE_HAS_FRACTIONAL,
@@ -170,7 +176,7 @@ typedef enum {
170176

171177
raise_mod_status bc_raisemod(bc_num base, bc_num exponent, bc_num mod, bc_num *result, size_t scale);
172178

173-
bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale);
179+
bc_raise_status bc_raise(bc_num base, long exponent, bc_num *result, size_t scale);
174180

175181
void bc_raise_bc_exponent(bc_num base, bc_num exponent, bc_num *resul, size_t scale);
176182

ext/bcmath/libbcmath/src/private.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ static const BC_VECTOR BC_POW_10_LUT[9] = {
8484
bcmath_compare_result _bc_do_compare (bc_num n1, bc_num n2, size_t scale, bool use_sign);
8585
bc_num _bc_do_add (bc_num n1, bc_num n2);
8686
bc_num _bc_do_sub (bc_num n1, bc_num n2);
87+
void bc_multiply_vector(
88+
const BC_VECTOR *n1_vector, size_t n1_arr_size, const BC_VECTOR *n2_vector, size_t n2_arr_size,
89+
BC_VECTOR *prod_vector, size_t prod_arr_size);
8790
void _bc_rm_leading_zeros (bc_num num);
8891

8992
#endif

ext/bcmath/libbcmath/src/raise.c

Lines changed: 181 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,155 @@
3030
*************************************************************************/
3131

3232
#include "bcmath.h"
33+
#include "convert.h"
34+
#include "private.h"
3335
#include <assert.h>
3436
#include <stdbool.h>
3537
#include <stddef.h>
3638

37-
void bc_square_ex(bc_num n1, bc_num *result, size_t scale_min) {
38-
bc_num square_ex = bc_square(n1, scale_min);
39-
bc_free_num(result);
40-
*(result) = square_ex;
39+
static inline size_t bc_multiply_vector_ex(
40+
BC_VECTOR **n1_vector, size_t n1_arr_size, BC_VECTOR *n2_vector, size_t n2_arr_size, BC_VECTOR **result_vector)
41+
{
42+
size_t result_arr_size = n1_arr_size + n2_arr_size;
43+
bc_multiply_vector(*n1_vector, n1_arr_size, n2_vector, n2_arr_size, *result_vector, result_arr_size);
44+
45+
/* Eliminate extra zeros because they increase the number of calculations. */
46+
while ((*result_vector)[result_arr_size - 1] == 0) {
47+
result_arr_size--;
48+
}
49+
50+
/* Swap n1_vector and result_vector. */
51+
BC_VECTOR *tmp = *n1_vector;
52+
*n1_vector = *result_vector;
53+
*result_vector = tmp;
54+
55+
return result_arr_size;
56+
}
57+
58+
static inline size_t bc_square_vector_ex(BC_VECTOR **base_vector, size_t base_arr_size, BC_VECTOR **result_vector)
59+
{
60+
return bc_multiply_vector_ex(base_vector, base_arr_size, *base_vector, base_arr_size, result_vector);
61+
}
62+
63+
/* Use "exponentiation by squaring". This is the fast path when the results are small. */
64+
static inline bc_num bc_fast_raise(
65+
const char *base_end, long exponent, size_t base_len, size_t power_len, size_t power_scale, size_t power_full_len)
66+
{
67+
BC_VECTOR base_vector = 0;
68+
69+
/* Convert to BC_VECTOR[] */
70+
bc_convert_to_vector(&base_vector, base_end, base_len);
71+
72+
while ((exponent & 1) == 0) {
73+
base_vector *= base_vector;
74+
exponent >>= 1;
75+
}
76+
77+
/* copy base to power */
78+
BC_VECTOR power_vector = base_vector;
79+
exponent >>= 1;
80+
81+
while (exponent > 0) {
82+
base_vector *= base_vector;
83+
if ((exponent & 1) == 1) {
84+
power_vector *= base_vector;
85+
}
86+
exponent >>= 1;
87+
}
88+
89+
bc_num power = bc_new_num_nonzeroed(power_len, power_scale);
90+
char *pptr = power->n_value;
91+
char *pend = pptr + power_full_len - 1;
92+
93+
while (pend >= pptr) {
94+
*pend-- = power_vector % BASE;
95+
power_vector /= BASE;
96+
}
97+
return power;
98+
}
99+
100+
/* Use "exponentiation by squaring". This is the standard path. */
101+
static bc_num bc_standard_raise(
102+
const char *base_ptr, const char *base_end, long exponent, size_t base_len, size_t power_scale)
103+
{
104+
/* Remove the leading zeros as they will be filled in later. */
105+
while (*base_ptr == 0) {
106+
base_ptr++;
107+
base_len--;
108+
}
109+
110+
size_t base_arr_size = BC_ARR_SIZE_FROM_LEN(base_len);
111+
/* Since it is guaranteed that base_len * exponent does not overflow, there is no possibility of overflow here. */
112+
size_t max_power_arr_size = base_arr_size * exponent;
113+
114+
/* The allocated memory area is reused on a rotational basis, so the same size is required. */
115+
BC_VECTOR *buf = safe_emalloc(max_power_arr_size, sizeof(BC_VECTOR) * 3, 0);
116+
BC_VECTOR *base_vector = buf;
117+
BC_VECTOR *power_vector = base_vector + max_power_arr_size;
118+
BC_VECTOR *tmp_result_vector = power_vector + max_power_arr_size;
119+
120+
/* Convert to BC_VECTOR[] */
121+
bc_convert_to_vector(base_vector, base_end, base_len);
122+
123+
while ((exponent & 1) == 0) {
124+
base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector);
125+
exponent >>= 1;
126+
}
127+
128+
/* copy base to power */
129+
size_t power_arr_size = base_arr_size;
130+
for (size_t i = 0; i < base_arr_size; i++) {
131+
power_vector[i] = base_vector[i];
132+
}
133+
exponent >>= 1;
134+
135+
while (exponent > 0) {
136+
base_arr_size = bc_square_vector_ex(&base_vector, base_arr_size, &tmp_result_vector);
137+
if ((exponent & 1) == 1) {
138+
power_arr_size = bc_multiply_vector_ex(&power_vector, power_arr_size, base_vector, base_arr_size, &tmp_result_vector);
139+
}
140+
exponent >>= 1;
141+
}
142+
143+
/* Convert to bc_num */
144+
size_t power_leading_zeros = 0;
145+
size_t power_len;
146+
size_t power_full_len = power_arr_size * BC_VECTOR_SIZE;
147+
if (power_full_len > power_scale) {
148+
power_len = power_full_len - power_scale;
149+
} else {
150+
power_len = 1;
151+
power_leading_zeros = power_scale - power_full_len + 1;
152+
power_full_len = power_scale + 1;
153+
}
154+
bc_num power = bc_new_num_nonzeroed(power_len, power_scale);
155+
156+
char *pptr = power->n_value;
157+
char *pend = pptr + power_full_len - 1;
158+
159+
/* Pad with leading zeros if necessary. */
160+
memset(pptr, 0, power_leading_zeros);
161+
pptr += power_leading_zeros;
162+
163+
bc_convert_vector_to_char(power_vector, pptr, pend, power_arr_size);
164+
165+
efree(buf);
166+
167+
return power;
41168
}
42169

43170
/* Raise "base" to the "exponent" power. The result is placed in RESULT.
44171
Maximum exponent is LONG_MAX. If a "exponent" is not an integer,
45172
only the integer part is used. */
46-
bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) {
47-
bc_num temp, power;
173+
bc_raise_status bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) {
48174
size_t rscale;
49-
size_t pwrscale;
50-
size_t calcscale;
51175
bool is_neg;
52176

53177
/* Special case if exponent is a zero. */
54178
if (exponent == 0) {
55179
bc_free_num (result);
56180
*result = bc_copy_num(BCG(_one_));
57-
return true;
181+
return BC_RAISE_STATUS_OK;
58182
}
59183

60184
/* Other initializations. */
@@ -67,44 +191,66 @@ bool bc_raise(bc_num base, long exponent, bc_num *result, size_t scale) {
67191
rscale = MIN (base->n_scale * exponent, MAX(scale, base->n_scale));
68192
}
69193

70-
/* Set initial value of temp. */
71-
power = bc_copy_num(base);
72-
pwrscale = base->n_scale;
73-
while ((exponent & 1) == 0) {
74-
pwrscale = 2 * pwrscale;
75-
bc_square_ex(power, &power, pwrscale);
76-
exponent = exponent >> 1;
194+
if (bc_is_zero(base)) {
195+
/* If the exponent is negative, it divides by 0 */
196+
return is_neg ? BC_RAISE_STATUS_DIVIDE_BY_ZERO : BC_RAISE_STATUS_OK;
77197
}
78-
temp = bc_copy_num(power);
79-
calcscale = pwrscale;
80-
exponent = exponent >> 1;
81198

82-
/* Do the calculation. */
83-
while (exponent > 0) {
84-
pwrscale = 2 * pwrscale;
85-
bc_square_ex(power, &power, pwrscale);
86-
if ((exponent & 1) == 1) {
87-
calcscale = pwrscale + calcscale;
88-
bc_multiply_ex(temp, power, &temp, calcscale);
89-
}
90-
exponent = exponent >> 1;
199+
/* check overflow */
200+
if (UNEXPECTED(base->n_len > SIZE_MAX / exponent)) {
201+
return BC_RAISE_STATUS_LEN_IS_OVERFLOW;
202+
}
203+
if (UNEXPECTED(base->n_scale > SIZE_MAX / exponent)) {
204+
return BC_RAISE_STATUS_SCALE_IS_OVERFLOW;
205+
}
206+
207+
size_t base_len = base->n_len + base->n_scale;
208+
size_t power_len = base->n_len * exponent;
209+
size_t power_scale = base->n_scale * exponent;
210+
211+
/* check overflow */
212+
if (UNEXPECTED(power_len > SIZE_MAX - power_scale)) {
213+
return BC_RAISE_STATUS_FULLLEN_IS_OVERFLOW;
214+
}
215+
size_t power_full_len = power_len + power_scale;
216+
217+
sign power_sign;
218+
if (base->n_sign == MINUS && (exponent & 1) == 1) {
219+
power_sign = MINUS;
220+
} else {
221+
power_sign = PLUS;
222+
}
223+
224+
const char *base_end = base->n_value + base_len - 1;
225+
226+
bc_num power;
227+
if (base_len <= BC_VECTOR_SIZE && power_full_len <= BC_VECTOR_SIZE * 2) {
228+
power = bc_fast_raise(base_end, exponent, base_len, power_len, power_scale, power_full_len);
229+
} else {
230+
power = bc_standard_raise(base->n_value, base_end, exponent, base_len, power_scale);
231+
}
232+
233+
_bc_rm_leading_zeros(power);
234+
if (bc_is_zero(power)) {
235+
power->n_sign = PLUS;
236+
power->n_scale = 0;
237+
} else {
238+
power->n_sign = power_sign;
91239
}
92240

93241
/* Assign the value. */
94242
if (is_neg) {
95-
if (bc_divide(BCG(_one_), temp, result, rscale) == false) {
96-
bc_free_num (&temp);
243+
if (bc_divide(BCG(_one_), power, result, rscale) == false) {
97244
bc_free_num (&power);
98-
return false;
245+
return BC_RAISE_STATUS_DIVIDE_BY_ZERO;
99246
}
100-
bc_free_num (&temp);
247+
bc_free_num (&power);
101248
} else {
102249
bc_free_num (result);
103-
*result = temp;
250+
*result = power;
104251
(*result)->n_scale = MIN(scale, (*result)->n_scale);
105252
}
106-
bc_free_num (&power);
107-
return true;
253+
return BC_RAISE_STATUS_OK;
108254
}
109255

110256
/* This is used internally by BCMath */

0 commit comments

Comments
 (0)