From c50687ef688cb828dc76df4d3b79571d676796f0 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 11 Dec 2024 21:34:07 +0100 Subject: [PATCH 1/4] Prototype iterator_zip --- Zend/zend_interfaces.c | 7 +- Zend/zend_interfaces.h | 1 + ext/spl/php_spl.stub.php | 2 + ext/spl/php_spl_arginfo.h | 8 +- ext/spl/spl_iterators.c | 270 ++++++++++++++++++++++++++++++++ ext/spl/tests/iterator_zip.phpt | 97 ++++++++++++ 6 files changed, 383 insertions(+), 2 deletions(-) create mode 100644 ext/spl/tests/iterator_zip.phpt diff --git a/Zend/zend_interfaces.c b/Zend/zend_interfaces.c index c0127feabbf68..32e1375efaf34 100644 --- a/Zend/zend_interfaces.c +++ b/Zend/zend_interfaces.c @@ -509,12 +509,17 @@ ZEND_API zend_result zend_create_internal_iterator_zval(zval *return_value, zval return FAILURE; } + zend_create_internal_iterator_iter(return_value, iter); + return SUCCESS; +} + +ZEND_API void zend_create_internal_iterator_iter(zval *return_value, zend_object_iterator *iter) +{ zend_internal_iterator *intern = (zend_internal_iterator *) zend_internal_iterator_create(zend_ce_internal_iterator); intern->iter = iter; intern->iter->index = 0; ZVAL_OBJ(return_value, &intern->std); - return SUCCESS; } static void zend_internal_iterator_free(zend_object *obj) { diff --git a/Zend/zend_interfaces.h b/Zend/zend_interfaces.h index 883e482f510c4..e933ba1588d36 100644 --- a/Zend/zend_interfaces.h +++ b/Zend/zend_interfaces.h @@ -75,6 +75,7 @@ ZEND_API int zend_user_serialize(zval *object, unsigned char **buffer, size_t *b ZEND_API int zend_user_unserialize(zval *object, zend_class_entry *ce, const unsigned char *buf, size_t buf_len, zend_unserialize_data *data); ZEND_API zend_result zend_create_internal_iterator_zval(zval *return_value, zval *obj); +ZEND_API void zend_create_internal_iterator_iter(zval *return_value, zend_object_iterator *iter); END_EXTERN_C() diff --git a/ext/spl/php_spl.stub.php b/ext/spl/php_spl.stub.php index d3b5d44f11d1e..bc7e11d256686 100644 --- a/ext/spl/php_spl.stub.php +++ b/ext/spl/php_spl.stub.php @@ -51,3 +51,5 @@ function iterator_apply(Traversable $iterator, callable $callback, ?array $args function iterator_count(iterable $iterator): int {} function iterator_to_array(iterable $iterator, bool $preserve_keys = true): array {} + +function iterator_zip(iterable... $iterators): InternalIterator {} diff --git a/ext/spl/php_spl_arginfo.h b/ext/spl/php_spl_arginfo.h index 68c71fc524bc5..bdc7c075c716d 100644 --- a/ext/spl/php_spl_arginfo.h +++ b/ext/spl/php_spl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 21ec2dcca99c85c90afcd319da76016a9f678dc2 */ + * Stub hash: c6e174b137ff27050be4fdc7f3ef5dbfb4b03ee8 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_class_implements, 0, 1, MAY_BE_ARRAY|MAY_BE_FALSE) ZEND_ARG_INFO(0, object_or_class) @@ -61,6 +61,10 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_iterator_to_array, 0, 1, IS_ARRA ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, preserve_keys, _IS_BOOL, 0, "true") ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_iterator_zip, 0, 0, InternalIterator, 0) + ZEND_ARG_VARIADIC_OBJ_TYPE_MASK(0, iterators, Traversable, MAY_BE_ARRAY) +ZEND_END_ARG_INFO() + ZEND_FUNCTION(class_implements); ZEND_FUNCTION(class_parents); ZEND_FUNCTION(class_uses); @@ -76,6 +80,7 @@ ZEND_FUNCTION(spl_object_id); ZEND_FUNCTION(iterator_apply); ZEND_FUNCTION(iterator_count); ZEND_FUNCTION(iterator_to_array); +ZEND_FUNCTION(iterator_zip); static const zend_function_entry ext_functions[] = { ZEND_FE(class_implements, arginfo_class_implements) @@ -93,5 +98,6 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(iterator_apply, arginfo_iterator_apply) ZEND_FE(iterator_count, arginfo_iterator_count) ZEND_FE(iterator_to_array, arginfo_iterator_to_array) + ZEND_FE(iterator_zip, arginfo_iterator_zip) ZEND_FE_END }; diff --git a/ext/spl/spl_iterators.c b/ext/spl/spl_iterators.c index 23eb95c4d7ea5..781cbdec818c7 100644 --- a/ext/spl/spl_iterators.c +++ b/ext/spl/spl_iterators.c @@ -3026,6 +3026,276 @@ PHP_FUNCTION(iterator_to_array) spl_iterator_apply(obj, use_keys ? spl_iterator_to_array_apply : spl_iterator_to_values_apply, (void*)return_value); } /* }}} */ +typedef struct { + /* To distinguish betwseen arrays and iterator objects we use the fact that UINT32_MAX + * is not a possible array hash position index. */ + HashPosition hash_position_or_tag; + union { + zend_array *array; + zend_object_iterator *obj_iter; + }; +} spl_zip_iterator_entry; + +typedef struct { + zend_object_iterator intern; + spl_zip_iterator_entry *iterators; + zval key_array; + uint32_t iterator_count; +} spl_zip_iterator; + +static zend_always_inline bool spl_zip_iterator_is_obj_entry(const spl_zip_iterator_entry *entry) +{ + return entry->hash_position_or_tag == UINT32_MAX; +} + +static void spl_iterator_zip_dtor(zend_object_iterator *iter) +{ + spl_zip_iterator *zip_iterator = (spl_zip_iterator *) iter; + for (uint32_t i = 0; i < zip_iterator->iterator_count; i++) { + spl_zip_iterator_entry *current = &zip_iterator->iterators[i]; + if (spl_zip_iterator_is_obj_entry(current)) { + zend_iterator_dtor(current->obj_iter); + } else { + zend_array_release(current->array); + } + } + zval_ptr_dtor(&iter->data); + efree(zip_iterator->iterators); +} + +static zend_result spl_iterator_zip_valid(zend_object_iterator *iter) +{ + spl_zip_iterator *zip_iterator = (spl_zip_iterator *) iter; + + uint32_t i = 0; + for (; i < zip_iterator->iterator_count; i++) { + spl_zip_iterator_entry *current = &zip_iterator->iterators[i]; + if (spl_zip_iterator_is_obj_entry(current)) { + if (current->obj_iter->funcs->valid(current->obj_iter) != SUCCESS) { + return FAILURE; + } + } else { + current->hash_position_or_tag = zend_hash_get_current_pos_ex(current->array, current->hash_position_or_tag); + if (current->hash_position_or_tag >= current->array->nNumUsed) { + return FAILURE; + } + } + } + + return i > 0 ? SUCCESS : FAILURE; +} + +/* Invariant: returned array is packed and has all UNDEF elements. */ +static zend_array *spl_iterator_zip_reset_array(spl_zip_iterator *zip_iterator, zval *array_zv) +{ + /* Reuse array if it's RC1 */ + if (!Z_ISUNDEF_P(array_zv) && Z_REFCOUNT_P(array_zv) == 1) { + zend_array *array = Z_ARR_P(array_zv); + if (HT_IS_PACKED(array) + && array->nNumUsed == zip_iterator->iterator_count + && array->nNumOfElements == zip_iterator->iterator_count) { + array->nNextFreeElement = zip_iterator->iterator_count; + for (uint32_t i = 0; i < zip_iterator->iterator_count; i++) { + zval_ptr_dtor(&array->arPacked[i]); + ZVAL_UNDEF(&array->arPacked[i]); + } + return array; + } + } + + zval_ptr_dtor(array_zv); + + /* Create optimized packed array */ + zend_array *array = zend_new_array(zip_iterator->iterator_count); + zend_hash_real_init_packed(array); + array->nNumUsed = array->nNumOfElements = array->nNextFreeElement = zip_iterator->iterator_count; + ZVAL_ARR(array_zv, array); + return array; +} + +void spl_iterator_zip_get_current_key(zend_object_iterator *iter, zval *key) +{ + spl_zip_iterator *zip_iterator = (spl_zip_iterator *) iter; + + zend_array *array = spl_iterator_zip_reset_array(zip_iterator, &zip_iterator->key_array); + + for (uint32_t i = 0; i < zip_iterator->iterator_count; i++) { + spl_zip_iterator_entry *current = &zip_iterator->iterators[i]; + if (spl_zip_iterator_is_obj_entry(current)) { + current->obj_iter->funcs->get_current_key(current->obj_iter, &array->arPacked[i]); + if (UNEXPECTED(EG(exception))) { + ZVAL_NULL(key); + return; + } + } else { + zend_hash_get_current_key_zval_ex(current->array, &array->arPacked[i], ¤t->hash_position_or_tag); + } + } + + ZVAL_COPY(key, &zip_iterator->key_array); +} + +zval *spl_iterator_zip_get_current_data(zend_object_iterator *iter) +{ + spl_zip_iterator *zip_iterator = (spl_zip_iterator *) iter; + + zend_array *array = spl_iterator_zip_reset_array(zip_iterator, &zip_iterator->intern.data); + + for (uint32_t i = 0; i < zip_iterator->iterator_count; i++) { + spl_zip_iterator_entry *current = &zip_iterator->iterators[i]; + zval *data; + if (spl_zip_iterator_is_obj_entry(current)) { + data = current->obj_iter->funcs->get_current_data(current->obj_iter); + } else { + data = zend_hash_get_current_data_ex(current->array, ¤t->hash_position_or_tag); + } + if (UNEXPECTED(data == NULL)) { + for (uint32_t j = 0; j < i; j++) { + zval_ptr_dtor(&array->arPacked[j]); + ZVAL_UNDEF(&array->arPacked[j]); + } + return NULL; + } + ZVAL_COPY(&array->arPacked[i], data); + } + + return &iter->data; +} + +void spl_iterator_zip_move_forward(zend_object_iterator *iter) +{ + spl_zip_iterator *zip_iterator = (spl_zip_iterator *) iter; + + for (uint32_t i = 0; i < zip_iterator->iterator_count; i++) { + spl_zip_iterator_entry *current = &zip_iterator->iterators[i]; + if (spl_zip_iterator_is_obj_entry(current)) { + current->obj_iter->funcs->move_forward(current->obj_iter); + if (UNEXPECTED(EG(exception))) { + return; + } + } else { + if (UNEXPECTED(zend_hash_move_forward_ex(current->array, ¤t->hash_position_or_tag) != SUCCESS)) { + return; + } + } + } +} + +void spl_iterator_zip_rewind(zend_object_iterator *iter) +{ + spl_zip_iterator *zip_iterator = (spl_zip_iterator *) iter; + + for (uint32_t i = 0; i < zip_iterator->iterator_count; i++) { + spl_zip_iterator_entry *current = &zip_iterator->iterators[i]; + if (spl_zip_iterator_is_obj_entry(current)) { + if (current->obj_iter->funcs->rewind) { + current->obj_iter->funcs->rewind(current->obj_iter); + if (UNEXPECTED(EG(exception))) { + return; + } + } else if (iter->index > 0) { + zend_throw_error(NULL, "Iterator does not support rewinding because one or more sub iterators do not support rewinding"); + return; + } + } else { + zend_hash_internal_pointer_reset_ex(current->array, ¤t->hash_position_or_tag); + } + } +} + +static HashTable *spl_iterator_zip_get_gc(zend_object_iterator *iter, zval **table, int *n) +{ + spl_zip_iterator *zip_iterator = (spl_zip_iterator *) iter; + + HashTable *ht_slot = NULL; + + // TODO: there can only be one gc_buffer active at a time + + for (uint32_t i = 0; i < zip_iterator->iterator_count; i++) { + // TODO: array ???? + spl_zip_iterator_entry *current = &zip_iterator->iterators[i]; + if (spl_zip_iterator_is_obj_entry(current)) { + if (current->obj_iter->funcs->get_gc) { + //HashTable *ht = current->obj_iter->funcs->get_gc(current->obj_iter, tmp_table, tmp_n); + if (ht_slot) { + + } else { + //ht_slot = ht; + } + } + } + } + + *table = NULL; + *n = 0; + + return ht_slot; +} + +static const zend_object_iterator_funcs spl_iterator_zip_funcs = { + spl_iterator_zip_dtor, + spl_iterator_zip_valid, + spl_iterator_zip_get_current_data, + spl_iterator_zip_get_current_key, + spl_iterator_zip_move_forward, + spl_iterator_zip_rewind, + NULL, /* invalidate_current */ // TODO ??? + spl_iterator_zip_get_gc, /* get_gc */ +}; + +// TODO: by ref support ??? (what happens now when we have a ref-returning generator?) +PHP_FUNCTION(iterator_zip) +{ + zval *argv; + uint32_t iterator_count; + + ZEND_PARSE_PARAMETERS_START(0, -1) + Z_PARAM_VARIADIC('*', argv, iterator_count) + ZEND_PARSE_PARAMETERS_END(); + + spl_zip_iterator_entry *iterators = safe_emalloc(iterator_count, sizeof(spl_zip_iterator_entry), 0); + + for (uint32_t i = 0; i < iterator_count; i++) { + if (UNEXPECTED(!zend_is_iterable(&argv[i]))) { + for (uint32_t j = 0; j < i; j++) { + spl_zip_iterator_entry *current = &iterators[i]; + if (spl_zip_iterator_is_obj_entry(current)) { + zend_iterator_dtor(current->obj_iter); + } else { + zval_ptr_dtor(&argv[j]); + } + } + efree(iterators); + zend_argument_value_error(i + 1, "must be of type iterable, %s given", zend_zval_value_name(&argv[i])); + RETURN_THROWS(); + } + + if (Z_TYPE(argv[i]) == IS_ARRAY) { + iterators[i].hash_position_or_tag = 0; + iterators[i].array = Z_ARR(argv[i]); + Z_TRY_ADDREF(argv[i]); + } else { + ZEND_ASSERT(Z_TYPE(argv[i]) == IS_OBJECT); + + zend_class_entry *ce = Z_OBJCE_P(&argv[i]); + zend_object_iterator *obj_iter = ce->get_iterator(ce, &argv[i], false); + iterators[i].hash_position_or_tag = UINT32_MAX; + iterators[i].obj_iter = obj_iter; + } + } + + spl_zip_iterator *iterator = emalloc(sizeof(*iterator)); + zend_iterator_init(&iterator->intern); + ZVAL_UNDEF(&iterator->intern.data); + ZVAL_UNDEF(&iterator->key_array); + + iterator->intern.funcs = &spl_iterator_zip_funcs; + iterator->iterators = iterators; + iterator->iterator_count = iterator_count; + + zend_create_internal_iterator_iter(return_value, &iterator->intern); +} + static int spl_iterator_count_apply(zend_object_iterator *iter, void *puser) /* {{{ */ { if (UNEXPECTED(*(zend_long*)puser == ZEND_LONG_MAX)) { diff --git a/ext/spl/tests/iterator_zip.phpt b/ext/spl/tests/iterator_zip.phpt new file mode 100644 index 0000000000000..61b4f0e0c3dbc --- /dev/null +++ b/ext/spl/tests/iterator_zip.phpt @@ -0,0 +1,97 @@ +--TEST-- +iterator_zip +--FILE-- + 1, 'y' => 2, 3 => 3]; +function gen_with_key($i) { + echo "in gen_with_key\n"; + yield 'a' => $i; + yield 'b' => $i+1; + yield 3 => $i+2; +} + +foreach (iterator_zip(gen_with_key(0), $a) as $key => $val) { + echo "KEYS: ", implode(', ', $key), "\n"; + echo "VALS: ", implode(', ', $val), "\n"; + unset($key); +} + +?> +--EXPECT-- +int(1) +int(4) + +int(2) +int(5) + +int(3) +int(6) + +in gen +in gen +in gen +array(4) { + [0]=> + int(0) + [1]=> + int(3) + [2]=> + int(6) + [3]=> + string(1) "a" +} +array(4) { + [0]=> + int(1) + [1]=> + int(4) + [2]=> + int(7) + [3]=> + string(1) "b" +} +array(4) { + [0]=> + int(2) + [1]=> + int(5) + [2]=> + int(8) + [3]=> + string(1) "c" +} +in gen_with_key +KEYS: a, x +VALS: 0, 1 +KEYS: b, y +VALS: 1, 2 +KEYS: 3, 3 +VALS: 2, 3 From 4d1b0c77377e93bed30be683b5e87981233fe149 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 5 Jan 2025 21:11:31 +0100 Subject: [PATCH 2/4] Don't need get_gc probably --- ext/spl/spl_iterators.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/ext/spl/spl_iterators.c b/ext/spl/spl_iterators.c index 781cbdec818c7..41b83273a9b2b 100644 --- a/ext/spl/spl_iterators.c +++ b/ext/spl/spl_iterators.c @@ -3203,35 +3203,6 @@ void spl_iterator_zip_rewind(zend_object_iterator *iter) } } -static HashTable *spl_iterator_zip_get_gc(zend_object_iterator *iter, zval **table, int *n) -{ - spl_zip_iterator *zip_iterator = (spl_zip_iterator *) iter; - - HashTable *ht_slot = NULL; - - // TODO: there can only be one gc_buffer active at a time - - for (uint32_t i = 0; i < zip_iterator->iterator_count; i++) { - // TODO: array ???? - spl_zip_iterator_entry *current = &zip_iterator->iterators[i]; - if (spl_zip_iterator_is_obj_entry(current)) { - if (current->obj_iter->funcs->get_gc) { - //HashTable *ht = current->obj_iter->funcs->get_gc(current->obj_iter, tmp_table, tmp_n); - if (ht_slot) { - - } else { - //ht_slot = ht; - } - } - } - } - - *table = NULL; - *n = 0; - - return ht_slot; -} - static const zend_object_iterator_funcs spl_iterator_zip_funcs = { spl_iterator_zip_dtor, spl_iterator_zip_valid, @@ -3240,7 +3211,7 @@ static const zend_object_iterator_funcs spl_iterator_zip_funcs = { spl_iterator_zip_move_forward, spl_iterator_zip_rewind, NULL, /* invalidate_current */ // TODO ??? - spl_iterator_zip_get_gc, /* get_gc */ + NULL, /* get_gc */ }; // TODO: by ref support ??? (what happens now when we have a ref-returning generator?) From b109c762db353b82d28b56e2d1c28f9de7784c0f Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 5 Jan 2025 21:14:39 +0100 Subject: [PATCH 3/4] some ref stuff --- ext/spl/spl_iterators.c | 2 +- ext/spl/tests/iterator_zip.phpt | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ext/spl/spl_iterators.c b/ext/spl/spl_iterators.c index 41b83273a9b2b..3b29d3958a5b9 100644 --- a/ext/spl/spl_iterators.c +++ b/ext/spl/spl_iterators.c @@ -3214,7 +3214,7 @@ static const zend_object_iterator_funcs spl_iterator_zip_funcs = { NULL, /* get_gc */ }; -// TODO: by ref support ??? (what happens now when we have a ref-returning generator?) +// TODO: by_ref support ??? PHP_FUNCTION(iterator_zip) { zval *argv; diff --git a/ext/spl/tests/iterator_zip.phpt b/ext/spl/tests/iterator_zip.phpt index 61b4f0e0c3dbc..2b74f9ae3d2d9 100644 --- a/ext/spl/tests/iterator_zip.phpt +++ b/ext/spl/tests/iterator_zip.phpt @@ -44,6 +44,20 @@ foreach (iterator_zip(gen_with_key(0), $a) as $key => $val) { unset($key); } +function &gen_reference() { + $value = 3; + + while ($value > 0) { + yield $value; + } +} + +foreach (iterator_zip(gen_reference(), gen_reference()) as $vals) { + var_dump($vals); + --$vals[0]; + --$vals[1]; +} + ?> --EXPECT-- int(1) @@ -95,3 +109,21 @@ KEYS: b, y VALS: 1, 2 KEYS: 3, 3 VALS: 2, 3 +array(2) { + [0]=> + &int(3) + [1]=> + &int(3) +} +array(2) { + [0]=> + &int(2) + [1]=> + &int(2) +} +array(2) { + [0]=> + &int(1) + [1]=> + &int(1) +} From d8a3d9f0d10d8b029dd3597028db954a778b29a4 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 5 Jan 2025 21:15:25 +0100 Subject: [PATCH 4/4] typo --- ext/spl/spl_iterators.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/spl/spl_iterators.c b/ext/spl/spl_iterators.c index 3b29d3958a5b9..97bb219b6668c 100644 --- a/ext/spl/spl_iterators.c +++ b/ext/spl/spl_iterators.c @@ -3027,7 +3027,7 @@ PHP_FUNCTION(iterator_to_array) } /* }}} */ typedef struct { - /* To distinguish betwseen arrays and iterator objects we use the fact that UINT32_MAX + /* To distinguish between arrays and iterator objects we use the fact that UINT32_MAX * is not a possible array hash position index. */ HashPosition hash_position_or_tag; union {