Skip to content

Commit f150f99

Browse files
authored
Improve browscap get_browser performance (php#12651)
* Split function and use _new variant to avoid redundant checks * Precompute better array size to avoid rehashing * Use new function to add into array instead of merging into, preventing temporary memory allocations * Convert to regex without separate copy + lowering We're already doing a character-wise loop after lowering, so just lower it character by character instead of looping over it twice and allocating memory. * Use HASH_MAP loop because htab can never be packed This saves additional checks. * Move destructor to more sensible place * Remove now unused browscap_zval_copy_ctor * Use zend_string_release_ex variant where possible * Implement dedicated greedy wildcard matching algorithm This avoids compiling, allocating and caching regexes and should run in the same complexity. * Cache previous length instead of repeatedly recomputing it * Add additional optimization to wildcard * matching * Move cheap checks to the callsite The function prologue and epilogue have a stupidly high overhead for those 2 simple checks at the start. We can't always-inline the reg_compare function because it contains alloca, and the alloca is really important for performance. Instead, move those cheap checks to the call site. * Use specialised loop to avoid unnecessary conversions and checks * Optimize counting loop by taking into account the prefix * Precompute the hash values of known keys * [ci skip] UPGRADING * Code style * Add a note why we have the early-skip checks in the loop
1 parent 299a234 commit f150f99

File tree

2 files changed

+155
-111
lines changed

2 files changed

+155
-111
lines changed

UPGRADING

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,5 @@ PHP 8.4 UPGRADE NOTES
197197
They now run in linear time instead of being bounded by quadratic time.
198198

199199
* mb_strcut() is much faster now for UTF-8 and UTF-16 strings.
200+
201+
* get_browser() is much faster now, up to 1.5x - 2.5x for some test cases.

ext/standard/browscap.c

Lines changed: 153 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -152,20 +152,16 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
152152
size_t i, j=0;
153153
char *t;
154154
zend_string *res;
155-
char *lc_pattern;
156-
ALLOCA_FLAG(use_heap);
157155

158156
res = zend_string_alloc(browscap_compute_regex_len(pattern), persistent);
159157
t = ZSTR_VAL(res);
160158

161-
lc_pattern = do_alloca(ZSTR_LEN(pattern) + 1, use_heap);
162-
zend_str_tolower_copy(lc_pattern, ZSTR_VAL(pattern), ZSTR_LEN(pattern));
163-
164159
t[j++] = '~';
165160
t[j++] = '^';
166161

167162
for (i = 0; i < ZSTR_LEN(pattern); i++, j++) {
168-
switch (lc_pattern[i]) {
163+
char c = ZSTR_VAL(pattern)[i];
164+
switch (c) {
169165
case '?':
170166
t[j] = '.';
171167
break;
@@ -198,7 +194,7 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
198194
t[j] = '+';
199195
break;
200196
default:
201-
t[j] = lc_pattern[i];
197+
t[j] = zend_tolower_ascii(c);
202198
break;
203199
}
204200
}
@@ -208,7 +204,6 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
208204
t[j]=0;
209205

210206
ZSTR_LEN(res) = j;
211-
free_alloca(lc_pattern, use_heap);
212207
return res;
213208
}
214209
/* }}} */
@@ -272,27 +267,39 @@ static void browscap_add_kv(
272267
bdata->kv_used++;
273268
}
274269

270+
static void browscap_entry_add_kv_to_existing_array(browser_data *bdata, browscap_entry *entry, HashTable *ht) {
271+
for (uint32_t i = entry->kv_start; i < entry->kv_end; i++) {
272+
zval tmp;
273+
ZVAL_STR_COPY(&tmp, bdata->kv[i].value);
274+
zend_hash_add(ht, bdata->kv[i].key, &tmp);
275+
}
276+
}
277+
275278
static HashTable *browscap_entry_to_array(browser_data *bdata, browscap_entry *entry) {
276279
zval tmp;
277-
uint32_t i;
278-
279-
HashTable *ht = zend_new_array(8);
280+
HashTable *ht = zend_new_array(2 + (entry->parent ? 1 : 0) + (entry->kv_end - entry->kv_start));
280281

281282
ZVAL_STR(&tmp, browscap_convert_pattern(entry->pattern, 0));
282-
zend_hash_str_add(ht, "browser_name_regex", sizeof("browser_name_regex")-1, &tmp);
283+
zend_string *key = ZSTR_INIT_LITERAL("browser_name_regex", 0);
284+
ZSTR_H(key) = zend_inline_hash_func("browser_name_regex", sizeof("browser_name_regex")-1);
285+
zend_hash_add_new(ht, key, &tmp);
286+
zend_string_release_ex(key, false);
283287

284288
ZVAL_STR_COPY(&tmp, entry->pattern);
285-
zend_hash_str_add(ht, "browser_name_pattern", sizeof("browser_name_pattern")-1, &tmp);
289+
key = ZSTR_INIT_LITERAL("browser_name_pattern", 0);
290+
ZSTR_H(key) = zend_inline_hash_func("browser_name_pattern", sizeof("browser_name_pattern")-1);
291+
zend_hash_add_new(ht, key, &tmp);
292+
zend_string_release_ex(key, false);
286293

287294
if (entry->parent) {
288295
ZVAL_STR_COPY(&tmp, entry->parent);
289-
zend_hash_str_add(ht, "parent", sizeof("parent")-1, &tmp);
296+
key = ZSTR_INIT_LITERAL("parent", 0);
297+
ZSTR_H(key) = zend_inline_hash_func("parent", sizeof("parent")-1);
298+
zend_hash_add_new(ht, key, &tmp);
299+
zend_string_release_ex(key, false);
290300
}
291301

292-
for (i = entry->kv_start; i < entry->kv_end; i++) {
293-
ZVAL_STR_COPY(&tmp, bdata->kv[i].value);
294-
zend_hash_add(ht, bdata->kv[i].key, &tmp);
295-
}
302+
browscap_entry_add_kv_to_existing_array(bdata, entry, ht);
296303

297304
return ht;
298305
}
@@ -542,31 +549,89 @@ static inline size_t browscap_get_minimum_length(browscap_entry *entry) {
542549
return len;
543550
}
544551

545-
static int browser_reg_compare(browscap_entry *entry, zend_string *agent_name, browscap_entry **found_entry_ptr) /* {{{ */
552+
static bool browscap_match_string_wildcard(const char *s, const char *s_end, const char *pattern, const char *pattern_end)
546553
{
547-
browscap_entry *found_entry = *found_entry_ptr;
548-
ALLOCA_FLAG(use_heap)
549-
zend_string *pattern_lc, *regex;
550-
const char *cur;
551-
int i;
554+
const char *pattern_current = pattern;
555+
const char *s_current = s;
556+
557+
const char *wildcard_pattern_restore_pos = NULL;
558+
const char *wildcard_s_restore_pos = NULL;
559+
560+
while (s_current < s_end) {
561+
char pattern_char = *pattern_current;
562+
char s_char = *s_current;
563+
564+
if (pattern_char == '*') {
565+
/* Collapse wildcards */
566+
pattern_current++;
567+
while (pattern_current < pattern_end && *pattern_current == '*') {
568+
pattern_current++;
569+
}
552570

553-
pcre2_code *re;
554-
pcre2_match_data *match_data;
555-
uint32_t capture_count;
556-
int rc;
571+
/* If we're at the end of the pattern, it means that the ending was just '*', so this is a trivial match */
572+
if (pattern_current == pattern_end) {
573+
return true;
574+
}
575+
576+
/* Optimization: if there is a non-wildcard character X after a *, then we can immediately jump to the first
577+
* character X in s starting from s_current because it is the only way to match beyond the *. */
578+
if (*pattern_current != '?') {
579+
while (s_current < s_end && *s_current != *pattern_current) {
580+
s_current++;
581+
}
582+
}
583+
584+
/* We will first assume the skipped part by * is a 0-length string (or n-length if the optimization above skipped n characters).
585+
* When a mismatch happens we will backtrack and move s one position to assume * skipped a 1-length string.
586+
* Then 2, 3, 4, ... */
587+
wildcard_pattern_restore_pos = pattern_current;
588+
wildcard_s_restore_pos = s_current;
589+
590+
continue;
591+
} else if (pattern_char == s_char || pattern_char == '?') {
592+
/* Match */
593+
pattern_current++;
594+
s_current++;
595+
596+
/* If this was the last character of the pattern, we either fully matched s, or we have a mismatch */
597+
if (pattern_current == pattern_end) {
598+
if (s_current == s_end) {
599+
return true;
600+
}
601+
/* Fallthrough to mismatch */
602+
} else {
603+
continue;
604+
}
605+
}
557606

558-
/* Agent name too short */
559-
if (ZSTR_LEN(agent_name) < browscap_get_minimum_length(entry)) {
560-
return 0;
607+
/* Mismatch */
608+
if (wildcard_pattern_restore_pos) {
609+
pattern_current = wildcard_pattern_restore_pos;
610+
wildcard_s_restore_pos++;
611+
s_current = wildcard_s_restore_pos;
612+
} else {
613+
/* No wildcard is active, so it is impossible to match */
614+
return false;
615+
}
561616
}
562617

563-
/* Quickly discard patterns where the prefix doesn't match. */
564-
if (zend_binary_strcasecmp(
565-
ZSTR_VAL(agent_name), entry->prefix_len,
566-
ZSTR_VAL(entry->pattern), entry->prefix_len) != 0) {
567-
return 0;
618+
/* Skip remaining * wildcards, they match nothing here as we are at the end of s */
619+
while (pattern_current < pattern_end && *pattern_current == '*') {
620+
pattern_current++;
568621
}
569622

623+
ZEND_ASSERT(s_current == s_end);
624+
return pattern_current == pattern_end;
625+
}
626+
627+
static int browser_reg_compare(browscap_entry *entry, zend_string *agent_name, browscap_entry **found_entry_ptr, size_t *cached_prev_len) /* {{{ */
628+
{
629+
browscap_entry *found_entry = *found_entry_ptr;
630+
ALLOCA_FLAG(use_heap)
631+
zend_string *pattern_lc;
632+
const char *cur;
633+
int i;
634+
570635
/* Lowercase the pattern, the agent name is already lowercase */
571636
ZSTR_ALLOCA_ALLOC(pattern_lc, ZSTR_LEN(entry->pattern), use_heap);
572637
zend_str_tolower_copy(ZSTR_VAL(pattern_lc), ZSTR_VAL(entry->pattern), ZSTR_LEN(entry->pattern));
@@ -590,91 +655,52 @@ static int browser_reg_compare(browscap_entry *entry, zend_string *agent_name, b
590655
/* See if we have an exact match, if so, we're done... */
591656
if (zend_string_equals(agent_name, pattern_lc)) {
592657
*found_entry_ptr = entry;
658+
/* cached_prev_len doesn't matter here because we end the search when an exact match is found. */
593659
ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
594660
return 1;
595661
}
596662

597-
regex = browscap_convert_pattern(entry->pattern, 0);
598-
re = pcre_get_compiled_regex(regex, &capture_count);
599-
if (re == NULL) {
600-
ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
601-
zend_string_release(regex);
602-
return 0;
603-
}
604-
605-
match_data = php_pcre_create_match_data(capture_count, re);
606-
if (!match_data) {
607-
ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
608-
zend_string_release(regex);
609-
return 0;
610-
}
611-
rc = pcre2_match(re, (PCRE2_SPTR)ZSTR_VAL(agent_name), ZSTR_LEN(agent_name), 0, 0, match_data, php_pcre_mctx());
612-
php_pcre_free_match_data(match_data);
613-
if (rc >= 0) {
663+
if (browscap_match_string_wildcard(
664+
ZSTR_VAL(agent_name) + entry->prefix_len,
665+
ZSTR_VAL(agent_name) + ZSTR_LEN(agent_name),
666+
ZSTR_VAL(pattern_lc) + entry->prefix_len,
667+
ZSTR_VAL(pattern_lc) + ZSTR_LEN(pattern_lc)
668+
)) {
614669
/* If we've found a possible browser, we need to do a comparison of the
615670
number of characters changed in the user agent being checked versus
616671
the previous match found and the current match. */
617-
if (found_entry) {
618-
size_t i, prev_len = 0, curr_len = 0;
619-
zend_string *previous_match = found_entry->pattern;
620-
zend_string *current_match = entry->pattern;
621-
622-
for (i = 0; i < ZSTR_LEN(previous_match); i++) {
623-
switch (ZSTR_VAL(previous_match)[i]) {
624-
case '?':
625-
case '*':
626-
/* do nothing, ignore these characters in the count */
627-
break;
628-
629-
default:
630-
++prev_len;
631-
}
632-
}
633-
634-
for (i = 0; i < ZSTR_LEN(current_match); i++) {
635-
switch (ZSTR_VAL(current_match)[i]) {
636-
case '?':
637-
case '*':
638-
/* do nothing, ignore these characters in the count */
639-
break;
672+
size_t curr_len = entry->prefix_len; /* Start from the prefix because the prefix is free of wildcards */
673+
zend_string *current_match = entry->pattern;
674+
for (size_t i = curr_len; i < ZSTR_LEN(current_match); i++) {
675+
switch (ZSTR_VAL(current_match)[i]) {
676+
case '?':
677+
case '*':
678+
/* do nothing, ignore these characters in the count */
679+
break;
640680

641-
default:
642-
++curr_len;
643-
}
681+
default:
682+
++curr_len;
644683
}
684+
}
645685

686+
if (found_entry) {
646687
/* Pick which browser pattern replaces the least amount of
647688
characters when compared to the original user agent string... */
648-
if (prev_len < curr_len) {
689+
if (*cached_prev_len < curr_len) {
649690
*found_entry_ptr = entry;
691+
*cached_prev_len = curr_len;
650692
}
651693
} else {
652694
*found_entry_ptr = entry;
695+
*cached_prev_len = curr_len;
653696
}
654697
}
655698

656699
ZSTR_ALLOCA_FREE(pattern_lc, use_heap);
657-
zend_string_release(regex);
658700
return 0;
659701
}
660702
/* }}} */
661703

662-
static void browscap_zval_copy_ctor(zval *p) /* {{{ */
663-
{
664-
if (Z_REFCOUNTED_P(p)) {
665-
zend_string *str;
666-
667-
ZEND_ASSERT(Z_TYPE_P(p) == IS_STRING);
668-
str = Z_STR_P(p);
669-
if (!(GC_FLAGS(str) & GC_PERSISTENT)) {
670-
GC_ADDREF(str);
671-
} else {
672-
ZVAL_NEW_STR(p, zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), 0));
673-
}
674-
}
675-
}
676-
/* }}} */
677-
678704
/* {{{ Get information about the capabilities of a browser. If browser_name is omitted or null, HTTP_USER_AGENT is used. Returns an object by default; if return_array is true, returns an array. */
679705
PHP_FUNCTION(get_browser)
680706
{
@@ -724,9 +750,31 @@ PHP_FUNCTION(get_browser)
724750
found_entry = zend_hash_find_ptr(bdata->htab, lookup_browser_name);
725751
if (found_entry == NULL) {
726752
browscap_entry *entry;
753+
size_t cached_prev_len = 0; /* silence compiler warning */
754+
755+
ZEND_HASH_MAP_FOREACH_PTR(bdata->htab, entry) {
756+
/* The following two early-skip checks are inside this loop instead of inside browser_reg_compare().
757+
* That's because we want to avoid the call frame overhead, especially as browser_reg_compare() is
758+
* a function that uses alloca(). */
759+
760+
/* Agent name too short */
761+
if (ZSTR_LEN(lookup_browser_name) < browscap_get_minimum_length(entry)) {
762+
continue;
763+
}
764+
765+
/* Quickly discard patterns where the prefix doesn't match. */
766+
bool prefix_matches = true;
767+
for (size_t i = 0; i < entry->prefix_len; i++) {
768+
if (ZSTR_VAL(lookup_browser_name)[i] != zend_tolower_ascii(ZSTR_VAL(entry->pattern)[i])) {
769+
prefix_matches = false;
770+
break;
771+
}
772+
}
773+
if (!prefix_matches) {
774+
continue;
775+
}
727776

728-
ZEND_HASH_FOREACH_PTR(bdata->htab, entry) {
729-
if (browser_reg_compare(entry, lookup_browser_name, &found_entry)) {
777+
if (browser_reg_compare(entry, lookup_browser_name, &found_entry, &cached_prev_len)) {
730778
break;
731779
}
732780
} ZEND_HASH_FOREACH_END();
@@ -735,12 +783,14 @@ PHP_FUNCTION(get_browser)
735783
found_entry = zend_hash_str_find_ptr(bdata->htab,
736784
DEFAULT_SECTION_NAME, sizeof(DEFAULT_SECTION_NAME)-1);
737785
if (found_entry == NULL) {
738-
zend_string_release(lookup_browser_name);
786+
zend_string_release_ex(lookup_browser_name, false);
739787
RETURN_FALSE;
740788
}
741789
}
742790
}
743791

792+
zend_string_release_ex(lookup_browser_name, false);
793+
744794
agent_ht = browscap_entry_to_array(bdata, found_entry);
745795

746796
if (return_array) {
@@ -749,23 +799,15 @@ PHP_FUNCTION(get_browser)
749799
object_and_properties_init(return_value, zend_standard_class_def, agent_ht);
750800
}
751801

802+
HashTable *target_ht = return_array ? Z_ARRVAL_P(return_value) : Z_OBJPROP_P(return_value);
803+
752804
while (found_entry->parent) {
753805
found_entry = zend_hash_find_ptr(bdata->htab, found_entry->parent);
754806
if (found_entry == NULL) {
755807
break;
756808
}
757809

758-
agent_ht = browscap_entry_to_array(bdata, found_entry);
759-
if (return_array) {
760-
zend_hash_merge(Z_ARRVAL_P(return_value), agent_ht, (copy_ctor_func_t) browscap_zval_copy_ctor, 0);
761-
} else {
762-
zend_hash_merge(Z_OBJPROP_P(return_value), agent_ht, (copy_ctor_func_t) browscap_zval_copy_ctor, 0);
763-
}
764-
765-
zend_hash_destroy(agent_ht);
766-
efree(agent_ht);
810+
browscap_entry_add_kv_to_existing_array(bdata, found_entry, target_ht);
767811
}
768-
769-
zend_string_release_ex(lookup_browser_name, 0);
770812
}
771813
/* }}} */

0 commit comments

Comments
 (0)