@@ -31358,7 +31358,7 @@ my_uca_can_be_contraction_part(const MY_CONTRACTIONS *c, my_wc_t wc, int flag)
31358
31358
@retval ptr - contraction weight array
31359
31359
*/
31360
31360
31361
- uint16 *
31361
+ const uint16 *
31362
31362
my_uca_contraction2_weight(const MY_CONTRACTIONS *list, my_wc_t wc1, my_wc_t wc2)
31363
31363
{
31364
31364
MY_CONTRACTION *c, *last;
@@ -31443,13 +31443,29 @@ my_uca_needs_context_handling(const MY_UCA_WEIGHT_LEVEL *level, my_wc_t wc)
31443
31443
@retval non-zero - strings are different
31444
31444
*/
31445
31445
31446
- static int
31447
- my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len)
31446
+ static inline int
31447
+ my_wmemcmp(const my_wc_t *a, const my_wc_t *b, size_t len)
31448
31448
{
31449
31449
return memcmp(a, b, len * sizeof(my_wc_t));
31450
31450
}
31451
31451
31452
31452
31453
+ /*
31454
+ Test if the MY_CONTRACTION instance is equal to the wide
31455
+ string with the given length.
31456
+ Note, only true contractions are checked,
31457
+ while previous context pairs always return FALSE.
31458
+ */
31459
+ static inline my_bool
31460
+ my_uca_true_contraction_eq(const MY_CONTRACTION *c,
31461
+ const my_wc_t *wc, size_t len)
31462
+ {
31463
+ return (len >= MY_UCA_MAX_CONTRACTION || c->ch[len] == 0) &&
31464
+ !c->with_context &&
31465
+ !my_wmemcmp(c->ch, wc, len);
31466
+ }
31467
+
31468
+
31453
31469
/**
31454
31470
Check if a string is a contraction,
31455
31471
and return its weight array on success.
@@ -31463,17 +31479,15 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len)
31463
31479
@retval ptr - contraction weight array
31464
31480
*/
31465
31481
31466
- static inline uint16 *
31482
+ static inline const uint16 *
31467
31483
my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
31468
31484
{
31469
31485
MY_CONTRACTION *c, *last;
31470
31486
DBUG_ASSERT(len <= MY_UCA_MAX_CONTRACTION);
31471
31487
31472
31488
for (c= list->item, last= c + list->nitems; c < last; c++)
31473
31489
{
31474
- if ((len >= MY_UCA_MAX_CONTRACTION || c->ch[len] == 0) &&
31475
- !c->with_context &&
31476
- !my_wmemcmp(c->ch, wc, len))
31490
+ if (my_uca_true_contraction_eq(c, wc, len))
31477
31491
return c->weight;
31478
31492
}
31479
31493
return NULL;
@@ -31495,12 +31509,15 @@ my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
31495
31509
@retval ptr - contraction weight array
31496
31510
*/
31497
31511
31498
- static uint16 *
31499
- my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc )
31512
+ static const uint16 *
31513
+ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t currwc )
31500
31514
{
31501
31515
size_t clen= 1;
31502
31516
int flag;
31503
31517
const uchar *s, *beg[MY_UCA_MAX_CONTRACTION];
31518
+ my_wc_t wc[MY_UCA_MAX_CONTRACTION];
31519
+ wc[0]= currwc;
31520
+
31504
31521
memset((void*) beg, 0, sizeof(beg));
31505
31522
31506
31523
/* Scan all contraction candidates */
@@ -31520,13 +31537,12 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
31520
31537
/* Find among candidates the longest real contraction */
31521
31538
for ( ; clen > 1; clen--)
31522
31539
{
31523
- uint16 *cweight;
31540
+ const uint16 *cweight;
31524
31541
if (my_uca_can_be_contraction_tail(&scanner->level->contractions,
31525
31542
wc[clen - 1]) &&
31526
31543
(cweight= my_uca_contraction_weight(&scanner->level->contractions,
31527
31544
wc, clen)))
31528
31545
{
31529
- scanner->wbeg= cweight + 1;
31530
31546
scanner->sbeg= beg[clen - 1];
31531
31547
return cweight;
31532
31548
}
@@ -31549,19 +31565,15 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
31549
31565
@retval ptr - contraction weight array
31550
31566
*/
31551
31567
31552
- static uint16 *
31553
- my_uca_previous_context_find(my_uca_scanner *scanner ,
31568
+ static const uint16 *
31569
+ my_uca_previous_context_find(const MY_CONTRACTIONS *list ,
31554
31570
my_wc_t wc0, my_wc_t wc1)
31555
31571
{
31556
- const MY_CONTRACTIONS *list= &scanner->level->contractions;
31557
31572
MY_CONTRACTION *c, *last;
31558
31573
for (c= list->item, last= c + list->nitems; c < last; c++)
31559
31574
{
31560
31575
if (c->with_context && wc0 == c->ch[0] && wc1 == c->ch[1])
31561
- {
31562
- scanner->wbeg= c->weight + 1;
31563
31576
return c->weight;
31564
- }
31565
31577
}
31566
31578
return NULL;
31567
31579
}
@@ -31584,10 +31596,11 @@ my_uca_previous_context_find(my_uca_scanner *scanner,
31584
31596
@retval NULL if could not find any contextual weights for wc[0]
31585
31597
@retval non null pointer to a zero-terminated weight string otherwise
31586
31598
*/
31587
- static inline uint16 *
31588
- my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc )
31599
+ static inline const uint16 *
31600
+ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t currwc )
31589
31601
{
31590
- uint16 *cweight;
31602
+ const uint16 *cweight;
31603
+ my_wc_t prevwc;
31591
31604
DBUG_ASSERT(scanner->level->contractions.nitems);
31592
31605
/*
31593
31606
If we have scanned a character which can have previous context,
@@ -31599,21 +31612,22 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
31599
31612
context at the moment. CLDR does not have longer sequences.
31600
31613
*/
31601
31614
if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
31602
- wc[0] ) &&
31615
+ currwc ) &&
31603
31616
scanner->wbeg != nochar && /* if not the very first character */
31604
31617
my_uca_can_be_previous_context_head(&scanner->level->contractions,
31605
- (wc[1] = ((scanner->page << 8) +
31618
+ (prevwc = ((scanner->page << 8) +
31606
31619
scanner->code))) &&
31607
- (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
31620
+ (cweight= my_uca_previous_context_find(&scanner->level->contractions,
31621
+ prevwc, currwc)))
31608
31622
{
31609
31623
scanner->page= scanner->code= 0; /* Clear for the next character */
31610
31624
return cweight;
31611
31625
}
31612
31626
else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
31613
- wc[0] ))
31627
+ currwc ))
31614
31628
{
31615
31629
/* Check if w[0] starts a contraction */
31616
- if ((cweight= my_uca_scanner_contraction_find(scanner, wc )))
31630
+ if ((cweight= my_uca_scanner_contraction_find(scanner, currwc )))
31617
31631
return cweight;
31618
31632
}
31619
31633
return NULL;
0 commit comments