@@ -152,20 +152,16 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
152
152
size_t i , j = 0 ;
153
153
char * t ;
154
154
zend_string * res ;
155
- char * lc_pattern ;
156
- ALLOCA_FLAG (use_heap );
157
155
158
156
res = zend_string_alloc (browscap_compute_regex_len (pattern ), persistent );
159
157
t = ZSTR_VAL (res );
160
158
161
- lc_pattern = do_alloca (ZSTR_LEN (pattern ) + 1 , use_heap );
162
- zend_str_tolower_copy (lc_pattern , ZSTR_VAL (pattern ), ZSTR_LEN (pattern ));
163
-
164
159
t [j ++ ] = '~' ;
165
160
t [j ++ ] = '^' ;
166
161
167
162
for (i = 0 ; i < ZSTR_LEN (pattern ); i ++ , j ++ ) {
168
- switch (lc_pattern [i ]) {
163
+ char c = ZSTR_VAL (pattern )[i ];
164
+ switch (c ) {
169
165
case '?' :
170
166
t [j ] = '.' ;
171
167
break ;
@@ -198,7 +194,7 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
198
194
t [j ] = '+' ;
199
195
break ;
200
196
default :
201
- t [j ] = lc_pattern [ i ] ;
197
+ t [j ] = zend_tolower_ascii ( c ) ;
202
198
break ;
203
199
}
204
200
}
@@ -208,7 +204,6 @@ static zend_string *browscap_convert_pattern(zend_string *pattern, int persisten
208
204
t [j ]= 0 ;
209
205
210
206
ZSTR_LEN (res ) = j ;
211
- free_alloca (lc_pattern , use_heap );
212
207
return res ;
213
208
}
214
209
/* }}} */
@@ -272,27 +267,39 @@ static void browscap_add_kv(
272
267
bdata -> kv_used ++ ;
273
268
}
274
269
270
+ static void browscap_entry_add_kv_to_existing_array (browser_data * bdata , browscap_entry * entry , HashTable * ht ) {
271
+ for (uint32_t i = entry -> kv_start ; i < entry -> kv_end ; i ++ ) {
272
+ zval tmp ;
273
+ ZVAL_STR_COPY (& tmp , bdata -> kv [i ].value );
274
+ zend_hash_add (ht , bdata -> kv [i ].key , & tmp );
275
+ }
276
+ }
277
+
275
278
static HashTable * browscap_entry_to_array (browser_data * bdata , browscap_entry * entry ) {
276
279
zval tmp ;
277
- uint32_t i ;
278
-
279
- HashTable * ht = zend_new_array (8 );
280
+ HashTable * ht = zend_new_array (2 + (entry -> parent ? 1 : 0 ) + (entry -> kv_end - entry -> kv_start ));
280
281
281
282
ZVAL_STR (& tmp , browscap_convert_pattern (entry -> pattern , 0 ));
282
- zend_hash_str_add (ht , "browser_name_regex" , sizeof ("browser_name_regex" )- 1 , & tmp );
283
+ zend_string * key = ZSTR_INIT_LITERAL ("browser_name_regex" , 0 );
284
+ ZSTR_H (key ) = zend_inline_hash_func ("browser_name_regex" , sizeof ("browser_name_regex" )- 1 );
285
+ zend_hash_add_new (ht , key , & tmp );
286
+ zend_string_release_ex (key , false);
283
287
284
288
ZVAL_STR_COPY (& tmp , entry -> pattern );
285
- zend_hash_str_add (ht , "browser_name_pattern" , sizeof ("browser_name_pattern" )- 1 , & tmp );
289
+ key = ZSTR_INIT_LITERAL ("browser_name_pattern" , 0 );
290
+ ZSTR_H (key ) = zend_inline_hash_func ("browser_name_pattern" , sizeof ("browser_name_pattern" )- 1 );
291
+ zend_hash_add_new (ht , key , & tmp );
292
+ zend_string_release_ex (key , false);
286
293
287
294
if (entry -> parent ) {
288
295
ZVAL_STR_COPY (& tmp , entry -> parent );
289
- zend_hash_str_add (ht , "parent" , sizeof ("parent" )- 1 , & tmp );
296
+ key = ZSTR_INIT_LITERAL ("parent" , 0 );
297
+ ZSTR_H (key ) = zend_inline_hash_func ("parent" , sizeof ("parent" )- 1 );
298
+ zend_hash_add_new (ht , key , & tmp );
299
+ zend_string_release_ex (key , false);
290
300
}
291
301
292
- for (i = entry -> kv_start ; i < entry -> kv_end ; i ++ ) {
293
- ZVAL_STR_COPY (& tmp , bdata -> kv [i ].value );
294
- zend_hash_add (ht , bdata -> kv [i ].key , & tmp );
295
- }
302
+ browscap_entry_add_kv_to_existing_array (bdata , entry , ht );
296
303
297
304
return ht ;
298
305
}
@@ -542,31 +549,89 @@ static inline size_t browscap_get_minimum_length(browscap_entry *entry) {
542
549
return len ;
543
550
}
544
551
545
- static int browser_reg_compare ( browscap_entry * entry , zend_string * agent_name , browscap_entry * * found_entry_ptr ) /* {{{ */
552
+ static bool browscap_match_string_wildcard ( const char * s , const char * s_end , const char * pattern , const char * pattern_end )
546
553
{
547
- browscap_entry * found_entry = * found_entry_ptr ;
548
- ALLOCA_FLAG (use_heap )
549
- zend_string * pattern_lc , * regex ;
550
- const char * cur ;
551
- int i ;
554
+ const char * pattern_current = pattern ;
555
+ const char * s_current = s ;
556
+
557
+ const char * wildcard_pattern_restore_pos = NULL ;
558
+ const char * wildcard_s_restore_pos = NULL ;
559
+
560
+ while (s_current < s_end ) {
561
+ char pattern_char = * pattern_current ;
562
+ char s_char = * s_current ;
563
+
564
+ if (pattern_char == '*' ) {
565
+ /* Collapse wildcards */
566
+ pattern_current ++ ;
567
+ while (pattern_current < pattern_end && * pattern_current == '*' ) {
568
+ pattern_current ++ ;
569
+ }
552
570
553
- pcre2_code * re ;
554
- pcre2_match_data * match_data ;
555
- uint32_t capture_count ;
556
- int rc ;
571
+ /* If we're at the end of the pattern, it means that the ending was just '*', so this is a trivial match */
572
+ if (pattern_current == pattern_end ) {
573
+ return true;
574
+ }
575
+
576
+ /* Optimization: if there is a non-wildcard character X after a *, then we can immediately jump to the first
577
+ * character X in s starting from s_current because it is the only way to match beyond the *. */
578
+ if (* pattern_current != '?' ) {
579
+ while (s_current < s_end && * s_current != * pattern_current ) {
580
+ s_current ++ ;
581
+ }
582
+ }
583
+
584
+ /* We will first assume the skipped part by * is a 0-length string (or n-length if the optimization above skipped n characters).
585
+ * When a mismatch happens we will backtrack and move s one position to assume * skipped a 1-length string.
586
+ * Then 2, 3, 4, ... */
587
+ wildcard_pattern_restore_pos = pattern_current ;
588
+ wildcard_s_restore_pos = s_current ;
589
+
590
+ continue ;
591
+ } else if (pattern_char == s_char || pattern_char == '?' ) {
592
+ /* Match */
593
+ pattern_current ++ ;
594
+ s_current ++ ;
595
+
596
+ /* If this was the last character of the pattern, we either fully matched s, or we have a mismatch */
597
+ if (pattern_current == pattern_end ) {
598
+ if (s_current == s_end ) {
599
+ return true;
600
+ }
601
+ /* Fallthrough to mismatch */
602
+ } else {
603
+ continue ;
604
+ }
605
+ }
557
606
558
- /* Agent name too short */
559
- if (ZSTR_LEN (agent_name ) < browscap_get_minimum_length (entry )) {
560
- return 0 ;
607
+ /* Mismatch */
608
+ if (wildcard_pattern_restore_pos ) {
609
+ pattern_current = wildcard_pattern_restore_pos ;
610
+ wildcard_s_restore_pos ++ ;
611
+ s_current = wildcard_s_restore_pos ;
612
+ } else {
613
+ /* No wildcard is active, so it is impossible to match */
614
+ return false;
615
+ }
561
616
}
562
617
563
- /* Quickly discard patterns where the prefix doesn't match. */
564
- if (zend_binary_strcasecmp (
565
- ZSTR_VAL (agent_name ), entry -> prefix_len ,
566
- ZSTR_VAL (entry -> pattern ), entry -> prefix_len ) != 0 ) {
567
- return 0 ;
618
+ /* Skip remaining * wildcards, they match nothing here as we are at the end of s */
619
+ while (pattern_current < pattern_end && * pattern_current == '*' ) {
620
+ pattern_current ++ ;
568
621
}
569
622
623
+ ZEND_ASSERT (s_current == s_end );
624
+ return pattern_current == pattern_end ;
625
+ }
626
+
627
+ static int browser_reg_compare (browscap_entry * entry , zend_string * agent_name , browscap_entry * * found_entry_ptr , size_t * cached_prev_len ) /* {{{ */
628
+ {
629
+ browscap_entry * found_entry = * found_entry_ptr ;
630
+ ALLOCA_FLAG (use_heap )
631
+ zend_string * pattern_lc ;
632
+ const char * cur ;
633
+ int i ;
634
+
570
635
/* Lowercase the pattern, the agent name is already lowercase */
571
636
ZSTR_ALLOCA_ALLOC (pattern_lc , ZSTR_LEN (entry -> pattern ), use_heap );
572
637
zend_str_tolower_copy (ZSTR_VAL (pattern_lc ), ZSTR_VAL (entry -> pattern ), ZSTR_LEN (entry -> pattern ));
@@ -590,91 +655,52 @@ static int browser_reg_compare(browscap_entry *entry, zend_string *agent_name, b
590
655
/* See if we have an exact match, if so, we're done... */
591
656
if (zend_string_equals (agent_name , pattern_lc )) {
592
657
* found_entry_ptr = entry ;
658
+ /* cached_prev_len doesn't matter here because we end the search when an exact match is found. */
593
659
ZSTR_ALLOCA_FREE (pattern_lc , use_heap );
594
660
return 1 ;
595
661
}
596
662
597
- regex = browscap_convert_pattern (entry -> pattern , 0 );
598
- re = pcre_get_compiled_regex (regex , & capture_count );
599
- if (re == NULL ) {
600
- ZSTR_ALLOCA_FREE (pattern_lc , use_heap );
601
- zend_string_release (regex );
602
- return 0 ;
603
- }
604
-
605
- match_data = php_pcre_create_match_data (capture_count , re );
606
- if (!match_data ) {
607
- ZSTR_ALLOCA_FREE (pattern_lc , use_heap );
608
- zend_string_release (regex );
609
- return 0 ;
610
- }
611
- rc = pcre2_match (re , (PCRE2_SPTR )ZSTR_VAL (agent_name ), ZSTR_LEN (agent_name ), 0 , 0 , match_data , php_pcre_mctx ());
612
- php_pcre_free_match_data (match_data );
613
- if (rc >= 0 ) {
663
+ if (browscap_match_string_wildcard (
664
+ ZSTR_VAL (agent_name ) + entry -> prefix_len ,
665
+ ZSTR_VAL (agent_name ) + ZSTR_LEN (agent_name ),
666
+ ZSTR_VAL (pattern_lc ) + entry -> prefix_len ,
667
+ ZSTR_VAL (pattern_lc ) + ZSTR_LEN (pattern_lc )
668
+ )) {
614
669
/* If we've found a possible browser, we need to do a comparison of the
615
670
number of characters changed in the user agent being checked versus
616
671
the previous match found and the current match. */
617
- if (found_entry ) {
618
- size_t i , prev_len = 0 , curr_len = 0 ;
619
- zend_string * previous_match = found_entry -> pattern ;
620
- zend_string * current_match = entry -> pattern ;
621
-
622
- for (i = 0 ; i < ZSTR_LEN (previous_match ); i ++ ) {
623
- switch (ZSTR_VAL (previous_match )[i ]) {
624
- case '?' :
625
- case '*' :
626
- /* do nothing, ignore these characters in the count */
627
- break ;
628
-
629
- default :
630
- ++ prev_len ;
631
- }
632
- }
633
-
634
- for (i = 0 ; i < ZSTR_LEN (current_match ); i ++ ) {
635
- switch (ZSTR_VAL (current_match )[i ]) {
636
- case '?' :
637
- case '*' :
638
- /* do nothing, ignore these characters in the count */
639
- break ;
672
+ size_t curr_len = entry -> prefix_len ; /* Start from the prefix because the prefix is free of wildcards */
673
+ zend_string * current_match = entry -> pattern ;
674
+ for (size_t i = curr_len ; i < ZSTR_LEN (current_match ); i ++ ) {
675
+ switch (ZSTR_VAL (current_match )[i ]) {
676
+ case '?' :
677
+ case '*' :
678
+ /* do nothing, ignore these characters in the count */
679
+ break ;
640
680
641
- default :
642
- ++ curr_len ;
643
- }
681
+ default :
682
+ ++ curr_len ;
644
683
}
684
+ }
645
685
686
+ if (found_entry ) {
646
687
/* Pick which browser pattern replaces the least amount of
647
688
characters when compared to the original user agent string... */
648
- if (prev_len < curr_len ) {
689
+ if (* cached_prev_len < curr_len ) {
649
690
* found_entry_ptr = entry ;
691
+ * cached_prev_len = curr_len ;
650
692
}
651
693
} else {
652
694
* found_entry_ptr = entry ;
695
+ * cached_prev_len = curr_len ;
653
696
}
654
697
}
655
698
656
699
ZSTR_ALLOCA_FREE (pattern_lc , use_heap );
657
- zend_string_release (regex );
658
700
return 0 ;
659
701
}
660
702
/* }}} */
661
703
662
- static void browscap_zval_copy_ctor (zval * p ) /* {{{ */
663
- {
664
- if (Z_REFCOUNTED_P (p )) {
665
- zend_string * str ;
666
-
667
- ZEND_ASSERT (Z_TYPE_P (p ) == IS_STRING );
668
- str = Z_STR_P (p );
669
- if (!(GC_FLAGS (str ) & GC_PERSISTENT )) {
670
- GC_ADDREF (str );
671
- } else {
672
- ZVAL_NEW_STR (p , zend_string_init (ZSTR_VAL (str ), ZSTR_LEN (str ), 0 ));
673
- }
674
- }
675
- }
676
- /* }}} */
677
-
678
704
/* {{{ Get information about the capabilities of a browser. If browser_name is omitted or null, HTTP_USER_AGENT is used. Returns an object by default; if return_array is true, returns an array. */
679
705
PHP_FUNCTION (get_browser )
680
706
{
@@ -724,9 +750,31 @@ PHP_FUNCTION(get_browser)
724
750
found_entry = zend_hash_find_ptr (bdata -> htab , lookup_browser_name );
725
751
if (found_entry == NULL ) {
726
752
browscap_entry * entry ;
753
+ size_t cached_prev_len = 0 ; /* silence compiler warning */
754
+
755
+ ZEND_HASH_MAP_FOREACH_PTR (bdata -> htab , entry ) {
756
+ /* The following two early-skip checks are inside this loop instead of inside browser_reg_compare().
757
+ * That's because we want to avoid the call frame overhead, especially as browser_reg_compare() is
758
+ * a function that uses alloca(). */
759
+
760
+ /* Agent name too short */
761
+ if (ZSTR_LEN (lookup_browser_name ) < browscap_get_minimum_length (entry )) {
762
+ continue ;
763
+ }
764
+
765
+ /* Quickly discard patterns where the prefix doesn't match. */
766
+ bool prefix_matches = true;
767
+ for (size_t i = 0 ; i < entry -> prefix_len ; i ++ ) {
768
+ if (ZSTR_VAL (lookup_browser_name )[i ] != zend_tolower_ascii (ZSTR_VAL (entry -> pattern )[i ])) {
769
+ prefix_matches = false;
770
+ break ;
771
+ }
772
+ }
773
+ if (!prefix_matches ) {
774
+ continue ;
775
+ }
727
776
728
- ZEND_HASH_FOREACH_PTR (bdata -> htab , entry ) {
729
- if (browser_reg_compare (entry , lookup_browser_name , & found_entry )) {
777
+ if (browser_reg_compare (entry , lookup_browser_name , & found_entry , & cached_prev_len )) {
730
778
break ;
731
779
}
732
780
} ZEND_HASH_FOREACH_END ();
@@ -735,12 +783,14 @@ PHP_FUNCTION(get_browser)
735
783
found_entry = zend_hash_str_find_ptr (bdata -> htab ,
736
784
DEFAULT_SECTION_NAME , sizeof (DEFAULT_SECTION_NAME )- 1 );
737
785
if (found_entry == NULL ) {
738
- zend_string_release (lookup_browser_name );
786
+ zend_string_release_ex (lookup_browser_name , false );
739
787
RETURN_FALSE ;
740
788
}
741
789
}
742
790
}
743
791
792
+ zend_string_release_ex (lookup_browser_name , false);
793
+
744
794
agent_ht = browscap_entry_to_array (bdata , found_entry );
745
795
746
796
if (return_array ) {
@@ -749,23 +799,15 @@ PHP_FUNCTION(get_browser)
749
799
object_and_properties_init (return_value , zend_standard_class_def , agent_ht );
750
800
}
751
801
802
+ HashTable * target_ht = return_array ? Z_ARRVAL_P (return_value ) : Z_OBJPROP_P (return_value );
803
+
752
804
while (found_entry -> parent ) {
753
805
found_entry = zend_hash_find_ptr (bdata -> htab , found_entry -> parent );
754
806
if (found_entry == NULL ) {
755
807
break ;
756
808
}
757
809
758
- agent_ht = browscap_entry_to_array (bdata , found_entry );
759
- if (return_array ) {
760
- zend_hash_merge (Z_ARRVAL_P (return_value ), agent_ht , (copy_ctor_func_t ) browscap_zval_copy_ctor , 0 );
761
- } else {
762
- zend_hash_merge (Z_OBJPROP_P (return_value ), agent_ht , (copy_ctor_func_t ) browscap_zval_copy_ctor , 0 );
763
- }
764
-
765
- zend_hash_destroy (agent_ht );
766
- efree (agent_ht );
810
+ browscap_entry_add_kv_to_existing_array (bdata , found_entry , target_ht );
767
811
}
768
-
769
- zend_string_release_ex (lookup_browser_name , 0 );
770
812
}
771
813
/* }}} */
0 commit comments