Skip to content

Commit 118351e

Browse files
authored
Merge pull request #19 from cubiest/trim_left
Draft: Improve golang identifier generation: Trim leftmost digits
2 parents b805427 + 24c6518 commit 118351e

File tree

2 files changed

+157
-5
lines changed

2 files changed

+157
-5
lines changed

strmangle.go

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,20 @@ var (
236236
titleCaseCache = map[string]string{}
237237
)
238238

239+
// TrimLeftDigits is for sanitizing the final output of an golang identifier:
240+
// trimming digits at the start.
241+
//
242+
// This func should be applied after everything else to create the final output,
243+
// or for the leftmost building block of an identifier.
244+
func TrimLeftDigits(n string) string {
245+
for i, r := range n {
246+
if !unicode.IsDigit(r) {
247+
return n[i:]
248+
}
249+
}
250+
return ""
251+
}
252+
239253
// sanitizeForIdentifier expects a string to be used to generate an identifier.
240254
// Accordingly, this func replaces any characters that would create an invalid identifier with '_'.
241255
func sanitizeForIdentifier(n string) string {
@@ -249,17 +263,31 @@ func sanitizeForIdentifier(n string) string {
249263
}
250264
cleanN += char
251265
}
252-
return cleanN
266+
return strings.TrimLeft(cleanN, "_") // Discard all leading '_'.
267+
}
268+
269+
// TitleCaseFull is like TitleCase, but trims digits on the leftmost of the string.
270+
//
271+
// This func should only be used on full identifier names, not identifier name building blocks.
272+
func TitleCaseFull(n string) string {
273+
return titleCase(n, true)
253274
}
254275

255276
// TitleCase changes a snake-case variable name
256277
// into a go styled object variable name of "ColumnName".
257278
// titleCase also fully uppercases "ID" components of names, for example
258279
// "column_name_id" to "ColumnNameID".
259280
//
281+
// Use this func for e.g. building blocks of an identifier name.
282+
// If you are working with a complete name, use TitleCaseFull() instead.
283+
//
260284
// Note: This method is ugly because it has been highly optimized,
261285
// we found that it was a fairly large bottleneck when we were using regexp.
262286
func TitleCase(n string) string {
287+
return titleCase(n, false)
288+
}
289+
290+
func titleCase(n string, trimLeftDigits bool) string {
263291
// Attempt to fetch from cache
264292
mut.RLock()
265293
val, ok := titleCaseCache[n]
@@ -269,6 +297,9 @@ func TitleCase(n string) string {
269297
}
270298

271299
cleanN := sanitizeForIdentifier(n)
300+
if trimLeftDigits {
301+
cleanN = TrimLeftDigits(cleanN)
302+
}
272303

273304
// If the string is made up of only uppercase letters and underscores,
274305
// then return as is and do not strip the underscores
@@ -305,6 +336,12 @@ func TitleCase(n string) string {
305336
}
306337

307338
word := name[start:end]
339+
340+
if trimLeftDigits {
341+
word = []byte(TrimLeftDigits(string(word)))
342+
trimLeftDigits = false
343+
}
344+
308345
wordLen := len(word)
309346
var vowels bool
310347

@@ -365,17 +402,34 @@ func Ignore(table, column string, ignoreList map[string]struct{}) bool {
365402
return false
366403
}
367404

405+
// CamelCaseFull is like CamelCase, but trims digits on the leftmost of the string.
406+
//
407+
// This func should only be used on full identifier names, not identifier name building blocks.
408+
func CamelCaseFull(name string) string {
409+
return camelCase(name, true)
410+
}
411+
368412
// CamelCase takes a variable name in the format of "var_name" and converts
369413
// it into a go styled variable name of "varName".
370414
// camelCase also fully uppercases "ID" components of names, for example
371415
// "var_name_id" to "varNameID". It will also lowercase the first letter
372416
// of the name in the case where it's fed something that starts with uppercase.
417+
//
418+
// Use this func for e.g. building blocks of an identifier name.
419+
// If you are working with a complete name, use CamelCaseFull() instead.
373420
func CamelCase(name string) string {
421+
return camelCase(name, false)
422+
}
423+
424+
func camelCase(name string, trimLeftDigits bool) string {
374425
buf := GetBuffer()
375426
defer PutBuffer(buf)
376427

377428
name = sanitizeForIdentifier(name)
378-
name = strings.TrimLeft(name, "_") // Discard all leading '_'.
429+
if trimLeftDigits {
430+
name = TrimLeftDigits(name)
431+
name = strings.TrimLeft(name, "_") // Discard all leading '_' (again).
432+
}
379433
if name == "" {
380434
return ""
381435
}

strmangle_test.go

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,75 @@ func TestPlural(t *testing.T) {
176176
}
177177
}
178178

179-
func TestTitleCase(t *testing.T) {
179+
func TestTrimLeftDigits(t *testing.T) {
180180
t.Parallel()
181181

182+
tests := []struct {
183+
In string
184+
Out string
185+
}{
186+
// No changes:
187+
{"hello_there", "hello_there"},
188+
{"", ""},
189+
{"_1", "_1"},
190+
{"im_a_teapot418", "im_a_teapot418"},
191+
// Trimmed left:
192+
{"1", ""},
193+
{"42", ""},
194+
{"42a", "a"},
195+
{"42a7", "a7"},
196+
{"1_", "_"},
197+
{"418im_a_teapot", "im_a_teapot"},
198+
}
199+
200+
for i, test := range tests {
201+
if out := TrimLeftDigits(test.In); out != test.Out {
202+
t.Errorf("[%d] (%s) Out was wrong: %q, want: %q", i, test.In, out, test.Out)
203+
}
204+
}
205+
}
206+
207+
func clearTitleCaseCache() {
208+
mut.RLock()
209+
titleCaseCache = map[string]string{}
210+
mut.RUnlock()
211+
}
212+
213+
func TestTitleCaseFull(t *testing.T) {
214+
defer clearTitleCaseCache()
215+
216+
tests := []struct {
217+
In string
218+
Out string
219+
}{
220+
// No changes:
221+
{"", ""},
222+
// Trimmed left:
223+
{"_1", ""},
224+
{"1", ""},
225+
{"42", ""},
226+
{"1_", ""},
227+
// Title cased & trimmed:
228+
{"42a", "A"},
229+
{"42a7", "A7"},
230+
{"_1a", "A"},
231+
{"hello_there", "HelloThere"},
232+
{"im_a_teapot418", "ImATeapot418"},
233+
{"im_418_years_old", "Im418YearsOld"},
234+
{"im_418years_old", "Im418yearsOld"},
235+
{"418im_a_teapot", "ImATeapot"},
236+
}
237+
238+
for i, test := range tests {
239+
if out := TitleCaseFull(test.In); out != test.Out {
240+
t.Errorf("[%d] (%s) Out was wrong: %q, want: %q", i, test.In, out, test.Out)
241+
}
242+
}
243+
}
244+
245+
func TestTitleCase(t *testing.T) {
246+
defer clearTitleCaseCache()
247+
182248
tests := []struct {
183249
In string
184250
Out string
@@ -222,8 +288,40 @@ func TestTitleCase(t *testing.T) {
222288
}
223289
}
224290

291+
func TestCamelCaseFull(t *testing.T) {
292+
defer clearTitleCaseCache()
293+
294+
tests := []struct {
295+
In string
296+
Out string
297+
}{
298+
// No changes:
299+
{"", ""},
300+
// Trimmed left:
301+
{"_1", ""},
302+
{"1", ""},
303+
{"42", ""},
304+
{"1_", ""},
305+
// Camel cased & trimmed:
306+
{"42a", "a"},
307+
{"42a7", "a7"},
308+
{"_1a", "a"},
309+
{"hello_there", "helloThere"},
310+
{"im_a_teapot418", "imATeapot418"},
311+
{"im_418_years_old", "im418YearsOld"},
312+
{"im_418years_old", "im418yearsOld"},
313+
{"418im_a_teapot", "imATeapot"},
314+
}
315+
316+
for i, test := range tests {
317+
if out := CamelCaseFull(test.In); out != test.Out {
318+
t.Errorf("[%d] (%s) Out was wrong: %q, want: %q", i, test.In, out, test.Out)
319+
}
320+
}
321+
}
322+
225323
func TestCamelCase(t *testing.T) {
226-
t.Parallel()
324+
defer clearTitleCaseCache()
227325

228326
tests := []struct {
229327
In string
@@ -271,7 +369,7 @@ func TestCamelCase(t *testing.T) {
271369
}
272370

273371
func TestTitleCaseIdentifier(t *testing.T) {
274-
t.Parallel()
372+
defer clearTitleCaseCache()
275373

276374
tests := []struct {
277375
In string

0 commit comments

Comments
 (0)