forked from SwagSoftware/Kisak-Strike
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcglmtex.cpp
1942 lines (1545 loc) · 61.4 KB
/
cglmtex.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//============ Copyright (c) Valve Corporation, All rights reserved. ============
//
// cglmtex.cpp
//
//===============================================================================
#include <vprof.h>
#include "togl/rendermechanism.h"
#include "tier0/icommandline.h"
#include "glmtexinlines.h"
// memdbgon -must- be the last include file in a .cpp file.
#include "tier0/memdbgon.h"
#if defined(OSX)
#include "appframework/ilaunchermgr.h"
extern ILauncherMgr *g_pLauncherMgr;
#endif
//===============================================================================
#if GLMDEBUG
CGLMTex *g_pFirstCGMLTex;
#endif
#define TEXSPACE_LOGGING 0
// encoding layout to an index where the bits read
// 4 : 1 if compressed
// 2 : 1 if not power of two
// 1 : 1 if mipmapped
bool pwroftwo (int val )
{
return (val & (val-1)) == 0;
}
int sEncodeLayoutAsIndex( GLMTexLayoutKey *key )
{
int index = 0;
if (key->m_texFlags & kGLMTexMipped)
{
index |= 1;
}
if ( ! ( pwroftwo(key->m_xSize) && pwroftwo(key->m_ySize) && pwroftwo(key->m_zSize) ) )
{
// if not all power of two
index |= 2;
}
if (GetFormatDesc( key->m_texFormat )->m_chunkSize >1 )
{
index |= 4;
}
return index;
}
static unsigned long g_texGlobalBytes[8];
//===============================================================================
const GLMTexFormatDesc g_formatDescTable[] =
{
// not yet handled by this table:
// D3DFMT_INDEX16, D3DFMT_VERTEXDATA // D3DFMT_INDEX32,
// WTF { D3DFMT_R5G6R5 ???, GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 1, 2 },
// WTF { D3DFMT_A ???, GL_ALPHA8, GL_ALPHA, GL_UNSIGNED_BYTE, 1, 1 },
// ??? D3DFMT_V8U8,
// ??? D3DFMT_Q8W8V8U8,
// ??? D3DFMT_X8L8V8U8,
// ??? D3DFMT_R32F,
// ??? D3DFMT_D24X4S4 unsure how to handle or if it is ever used..
// ??? D3DFMT_D15S1 ever used ?
// ??? D3DFMT_D24X8 ever used?
// summ-name d3d-format gl-int-format gl-int-format-srgb gl-data-format gl-data-type chunksize, bytes-per-sqchunk
{ "_D16", D3DFMT_D16, GL_DEPTH_COMPONENT16, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 1, 2 },
{ "_D24X8", D3DFMT_D24X8, GL_DEPTH_COMPONENT24, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, 1, 4 }, // ??? unsure on this one
{ "_D24S8", D3DFMT_D24S8, GL_DEPTH24_STENCIL8_EXT, 0, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, 1, 4 },
{ "_A8R8G8B8", D3DFMT_A8R8G8B8, GL_RGBA8, GL_SRGB8_ALPHA8_EXT, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 1, 4 },
{ "_A4R4G4B4", D3DFMT_A4R4G4B4, GL_RGBA4, 0, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV, 1, 2 },
{ "_X8R8G8B8", D3DFMT_X8R8G8B8, GL_RGB8, GL_SRGB8_EXT, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 1, 4 },
{ "_X1R5G5B5", D3DFMT_X1R5G5B5, GL_RGB5, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, 1, 2 },
{ "_A1R5G5B5", D3DFMT_A1R5G5B5, GL_RGB5_A1, 0, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, 1, 2 },
{ "_L8", D3DFMT_L8, GL_LUMINANCE8, GL_SLUMINANCE8_EXT, GL_LUMINANCE, GL_UNSIGNED_BYTE, 1, 1 },
{ "_A8L8", D3DFMT_A8L8, GL_LUMINANCE8_ALPHA8, GL_SLUMINANCE8_ALPHA8_EXT, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, 1, 2 },
{ "_DXT1", D3DFMT_DXT1, GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_COMPRESSED_SRGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_BYTE, 4, 8 },
{ "_DXT3", D3DFMT_DXT3, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_BYTE, 4, 16 },
{ "_DXT5", D3DFMT_DXT5, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_BYTE, 4, 16 },
{ "_A16B16G16R16F", D3DFMT_A16B16G16R16F, GL_RGBA16F_ARB, 0, GL_RGBA, GL_HALF_FLOAT_ARB, 1, 8 },
{ "_A16B16G16R16", D3DFMT_A16B16G16R16, GL_RGBA16, 0, GL_RGBA, GL_UNSIGNED_SHORT, 1, 8 }, // 16bpc integer tex
{ "_A32B32G32R32F", D3DFMT_A32B32G32R32F, GL_RGBA32F_ARB, 0, GL_RGBA, GL_FLOAT, 1, 16 },
{ "_R8G8B8", D3DFMT_R8G8B8, GL_RGB8, GL_SRGB8_EXT, GL_BGR, GL_UNSIGNED_BYTE, 1, 3 },
{ "_A8", D3DFMT_A8, GL_ALPHA8, 0, GL_ALPHA, GL_UNSIGNED_BYTE, 1, 1 },
{ "_R5G6B5", D3DFMT_R5G6B5, GL_RGB, GL_SRGB_EXT, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 1, 2 },
// fakey tex formats: the stated GL format and the memory layout may not agree (U8V8 for example)
// _Q8W8V8U8 we just pass through as RGBA bytes. Shader does scale/bias fix
{ "_Q8W8V8U8", D3DFMT_Q8W8V8U8, GL_RGBA8, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 1, 4 }, // straight ripoff of D3DFMT_A8R8G8B8
// U8V8 is exposed to the client as 2-bytes per texel, but we download it as 3-byte RGB.
// WriteTexels needs to do that conversion from rg8 to rgb8 in order to be able to download it correctly
{ "_V8U8", D3DFMT_V8U8, GL_RGB8, 0, GL_RG, GL_BYTE, 1, 2 },
{ "_R32F", D3DFMT_R32F, GL_R32F, GL_R32F, GL_RED, GL_FLOAT, 1, 4 },
{ "_A2R10G10B10", D3DFMT_A2R10G10B10, GL_RGB10_A2, GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_10_10_10_2, 1, 4 },
{ "_A2B10G10R10", D3DFMT_A2B10G10R10, GL_RGB10_A2, GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_10_10_10_2, 1, 4 },
/*
// NV shadow depth tex
D3DFMT_NV_INTZ = 0x5a544e49, // MAKEFOURCC('I','N','T','Z')
D3DFMT_NV_RAWZ = 0x5a574152, // MAKEFOURCC('R','A','W','Z')
// NV null tex
D3DFMT_NV_NULL = 0x4c4c554e, // MAKEFOURCC('N','U','L','L')
// ATI shadow depth tex
D3DFMT_ATI_D16 = 0x36314644, // MAKEFOURCC('D','F','1','6')
D3DFMT_ATI_D24S8 = 0x34324644, // MAKEFOURCC('D','F','2','4')
// ATI 1N and 2N compressed tex
D3DFMT_ATI_2N = 0x32495441, // MAKEFOURCC('A', 'T', 'I', '2')
D3DFMT_ATI_1N = 0x31495441, // MAKEFOURCC('A', 'T', 'I', '1')
*/
};
int g_formatDescTableCount = sizeof(g_formatDescTable) / sizeof( g_formatDescTable[0] );
const GLMTexFormatDesc *GetFormatDesc( D3DFORMAT format )
{
for( int i=0; i<g_formatDescTableCount; i++)
{
if (g_formatDescTable[i].m_d3dFormat == format)
{
return &g_formatDescTable[i];
}
}
return (const GLMTexFormatDesc *)NULL; // not found
}
//===============================================================================
void InsertTexelComponentFixed( float value, int width, unsigned long *valuebuf )
{
unsigned long range = (1<<width);
unsigned long scaled = (value * (float) range) * (range-1) / (range);
if (scaled >= range) DebuggerBreak();
*valuebuf = (*valuebuf << width) | scaled;
}
// return true if successful
bool GLMGenTexels( GLMGenTexelParams *params )
{
unsigned char chunkbuf[256]; // can't think of any chunk this big..
const GLMTexFormatDesc *format = GetFormatDesc( params->m_format );
if (!format)
{
return FALSE; // fail
}
// this section just generates one square chunk in the desired format
unsigned long *temp32 = (unsigned long*)chunkbuf;
unsigned int chunksize = 0; // we can sanity check against the format table with this
switch( params->m_format )
{
// comment shows byte order in RAM
// lowercase is bit arrangement in a byte
case D3DFMT_A8R8G8B8: // B G R A
InsertTexelComponentFixed( params->a, 8, temp32 ); // A is inserted first and winds up at most significant bits after insertions follow
InsertTexelComponentFixed( params->r, 8, temp32 );
InsertTexelComponentFixed( params->g, 8, temp32 );
InsertTexelComponentFixed( params->b, 8, temp32 );
chunksize = 4;
break;
case D3DFMT_A4R4G4B4: // [ggggbbbb] [aaaarrrr] RA (nibbles)
InsertTexelComponentFixed( params->a, 4, temp32 );
InsertTexelComponentFixed( params->r, 4, temp32 );
InsertTexelComponentFixed( params->g, 4, temp32 );
InsertTexelComponentFixed( params->b, 4, temp32 );
chunksize = 2;
break;
case D3DFMT_X8R8G8B8: // B G R X
InsertTexelComponentFixed( 0.0, 8, temp32 );
InsertTexelComponentFixed( params->r, 8, temp32 );
InsertTexelComponentFixed( params->g, 8, temp32 );
InsertTexelComponentFixed( params->b, 8, temp32 );
chunksize = 4;
break;
case D3DFMT_X1R5G5B5: // [gggbbbbb] [xrrrrrgg]
InsertTexelComponentFixed( 0.0, 1, temp32 );
InsertTexelComponentFixed( params->r, 5, temp32 );
InsertTexelComponentFixed( params->g, 5, temp32 );
InsertTexelComponentFixed( params->b, 5, temp32 );
chunksize = 2;
break;
case D3DFMT_A1R5G5B5: // [gggbbbbb] [arrrrrgg]
InsertTexelComponentFixed( params->a, 1, temp32 );
InsertTexelComponentFixed( params->r, 5, temp32 );
InsertTexelComponentFixed( params->g, 5, temp32 );
InsertTexelComponentFixed( params->b, 5, temp32 );
chunksize = 2;
break;
case D3DFMT_L8: // L // caller, use R for L
InsertTexelComponentFixed( params->r, 8, temp32 );
chunksize = 1;
break;
case D3DFMT_A8L8: // L A // caller, use R for L and A for A
InsertTexelComponentFixed( params->a, 8, temp32 );
InsertTexelComponentFixed( params->r, 8, temp32 );
chunksize = 2;
break;
case D3DFMT_R8G8B8: // B G R
InsertTexelComponentFixed( params->r, 8, temp32 );
InsertTexelComponentFixed( params->g, 8, temp32 );
InsertTexelComponentFixed( params->b, 8, temp32 );
chunksize = 3;
break;
case D3DFMT_A8: // A
InsertTexelComponentFixed( params->a, 8, temp32 );
chunksize = 1;
break;
case D3DFMT_R5G6B5: // [gggbbbbb] [rrrrrggg]
InsertTexelComponentFixed( params->r, 5, temp32 );
InsertTexelComponentFixed( params->g, 6, temp32 );
InsertTexelComponentFixed( params->b, 5, temp32 );
chunksize = 2;
break;
case D3DFMT_DXT1:
{
memset( temp32, 0, 8 ); // zap 8 bytes
// two 565 RGB words followed by 32 bits of 2-bit interp values for a 4x4 block
// we write the same color to both slots and all zeroes for the mask (one color total)
unsigned long dxt1_color = 0;
// generate one such word and clone it
InsertTexelComponentFixed( params->r, 5, &dxt1_color );
InsertTexelComponentFixed( params->g, 6, &dxt1_color );
InsertTexelComponentFixed( params->b, 5, &dxt1_color );
// dupe
dxt1_color = dxt1_color | (dxt1_color<<16);
// write into chunkbuf
*(unsigned long*)&chunkbuf[0] = dxt1_color;
// color mask bits after that are already set to all zeroes. chunk is done.
chunksize = 8;
}
break;
case D3DFMT_DXT3:
{
memset( temp32, 0, 16 ); // zap 16 bytes
// eight bytes of alpha (16 4-bit alpha nibbles)
// followed by a DXT1 block
unsigned long dxt3_alpha = 0;
for( int i=0; i<8; i++)
{
// splat same alpha through block
InsertTexelComponentFixed( params->a, 4, &dxt3_alpha );
}
unsigned long dxt3_color = 0;
// generate one such word and clone it
InsertTexelComponentFixed( params->r, 5, &dxt3_color );
InsertTexelComponentFixed( params->g, 6, &dxt3_color );
InsertTexelComponentFixed( params->b, 5, &dxt3_color );
// dupe
dxt3_color = dxt3_color | (dxt3_color<<16);
// write into chunkbuf
*(unsigned long*)&chunkbuf[0] = dxt3_alpha;
*(unsigned long*)&chunkbuf[4] = dxt3_alpha;
*(unsigned long*)&chunkbuf[8] = dxt3_color;
*(unsigned long*)&chunkbuf[12] = dxt3_color;
chunksize = 16;
}
break;
case D3DFMT_DXT5:
{
memset( temp32, 0, 16 ); // zap 16 bytes
// DXT5 has 8 bytes of compressed alpha, then 8 bytes of compressed RGB like DXT1.
// the 8 alpha bytes are 2 bytes of endpoint alpha values, then 16x3 bits of interpolants.
// so to write a single alpha value, just figure out the value, store it in both the first two bytes then store zeroes.
InsertTexelComponentFixed( params->a, 8, (unsigned long*)&chunkbuf[0] );
InsertTexelComponentFixed( params->a, 8, (unsigned long*)&chunkbuf[0] );
// rest of the alpha mask was already zeroed.
// now do colors
unsigned long dxt5_color = 0;
// generate one such word and clone it
InsertTexelComponentFixed( params->r, 5, &dxt5_color );
InsertTexelComponentFixed( params->g, 6, &dxt5_color );
InsertTexelComponentFixed( params->b, 5, &dxt5_color );
// dupe
dxt5_color = dxt5_color | (dxt5_color<<16);
// write into chunkbuf
*(unsigned long*)&chunkbuf[8] = dxt5_color;
*(unsigned long*)&chunkbuf[12] = dxt5_color;
chunksize = 16;
}
break;
case D3DFMT_A32B32G32R32F:
{
*(float*)&chunkbuf[0] = params->r;
*(float*)&chunkbuf[4] = params->g;
*(float*)&chunkbuf[8] = params->b;
*(float*)&chunkbuf[12] = params->a;
chunksize = 16;
}
break;
case D3DFMT_A16B16G16R16:
memset( chunkbuf, 0, 8 );
// R and G wind up in the first 32 bits
// B and A wind up in the second 32 bits
InsertTexelComponentFixed( params->a, 16, (unsigned long*)&chunkbuf[4] ); // winds up as MSW of second word (note [4]) - thus last in RAM
InsertTexelComponentFixed( params->b, 16, (unsigned long*)&chunkbuf[4] );
InsertTexelComponentFixed( params->g, 16, (unsigned long*)&chunkbuf[0] );
InsertTexelComponentFixed( params->r, 16, (unsigned long*)&chunkbuf[0] ); // winds up as LSW of first word, thus first in RAM
chunksize = 8;
break;
// not done yet
//case D3DFMT_D16:
//case D3DFMT_D24X8:
//case D3DFMT_D24S8:
//case D3DFMT_A16B16G16R16F:
default:
return FALSE; // fail
break;
}
// once the chunk buffer is filled..
// sanity check the reported chunk size.
if (static_cast<int>(chunksize) != format->m_bytesPerSquareChunk)
{
DebuggerBreak();
return FALSE;
}
// verify that the amount you want to write will not exceed the limit byte count
unsigned long destByteCount = chunksize * params->m_chunkCount;
if (static_cast<int>(destByteCount) > params->m_byteCountLimit)
{
DebuggerBreak();
return FALSE;
}
// write the bytes.
unsigned char *destP = (unsigned char*)params->m_dest;
for( int chunk=0; chunk < params->m_chunkCount; chunk++)
{
for( uint byteindex = 0; byteindex < chunksize; byteindex++)
{
*destP++ = chunkbuf[byteindex];
}
}
params->m_bytesWritten = destP - (unsigned char*)params->m_dest;
return TRUE;
}
//===============================================================================
bool LessFunc_GLMTexLayoutKey( const GLMTexLayoutKey &a, const GLMTexLayoutKey &b )
{
#define DO_LESS(fff) if (a.fff != b.fff) { return (a.fff< b.fff); }
DO_LESS(m_texGLTarget);
DO_LESS(m_texFormat);
DO_LESS(m_texFlags);
DO_LESS(m_texSamples);
DO_LESS(m_xSize);
DO_LESS(m_ySize)
DO_LESS(m_zSize);
#undef DO_LESS
return false; // they are equal
}
CGLMTexLayoutTable::CGLMTexLayoutTable()
{
m_layoutMap.SetLessFunc( LessFunc_GLMTexLayoutKey );
}
GLMTexLayout *CGLMTexLayoutTable::NewLayoutRef( GLMTexLayoutKey *pDesiredKey )
{
GLMTexLayoutKey tempKey;
GLMTexLayoutKey *key = pDesiredKey;
// look up 'key' in the map and see if it's a hit, if so, bump the refcount and return
// if not, generate a completed layout based on the key, add to map, set refcount to 1, return that
const GLMTexFormatDesc *formatDesc = GetFormatDesc( key->m_texFormat );
//bool compression = (formatDesc->m_chunkSize > 1) != 0;
if (!formatDesc)
{
GLMStop(); // bad news
}
if ( gGL->m_bHave_GL_EXT_texture_sRGB_decode )
{
if ( ( formatDesc->m_glIntFormatSRGB != 0 ) && ( ( key->m_texFlags & kGLMTexSRGB ) == 0 ) )
{
tempKey = *pDesiredKey;
key = &tempKey;
// Slam on SRGB texture flag, and we'll use GL_EXT_texture_sRGB_decode to selectively turn it off in the samplers
key->m_texFlags |= kGLMTexSRGB;
}
}
unsigned short index = m_layoutMap.Find( *key );
if (index != m_layoutMap.InvalidIndex())
{
// found it
//printf(" -hit- ");
GLMTexLayout *layout = m_layoutMap[ index ];
// bump ref count
layout->m_refCount ++;
return layout;
}
else
{
//printf(" -miss- ");
// need to make a new one
// to allocate it, we need to know how big to make it (slice count)
// figure out how many mip levels are in play
int mipCount = 1;
if (key->m_texFlags & kGLMTexMipped)
{
int largestAxis = key->m_xSize;
if (key->m_ySize > largestAxis)
largestAxis = key->m_ySize;
if (key->m_zSize > largestAxis)
largestAxis = key->m_zSize;
mipCount = 0;
while( largestAxis > 0 )
{
mipCount ++;
largestAxis >>= 1;
}
}
int faceCount = 1;
if (key->m_texGLTarget == GL_TEXTURE_CUBE_MAP)
{
faceCount = 6;
}
int sliceCount = mipCount * faceCount;
if (key->m_texFlags & kGLMTexMultisampled)
{
Assert( (key->m_texGLTarget == GL_TEXTURE_2D) );
Assert( sliceCount == 1 );
// assume non mipped
Assert( (key->m_texFlags & kGLMTexMipped) == 0 );
Assert( (key->m_texFlags & kGLMTexMippedAuto) == 0 );
// assume renderable and srgb
Assert( (key->m_texFlags & kGLMTexRenderable) !=0 );
//Assert( (key->m_texFlags & kGLMTexSRGB) !=0 ); //FIXME don't assert on making depthstencil surfaces which are non srgb
// double check sample count (FIXME need real limit check here against device/driver)
Assert( (key->m_texSamples==2) || (key->m_texSamples==4) || (key->m_texSamples==6) || (key->m_texSamples==8) );
}
// now we know enough to allocate and populate the new tex layout.
// malloc the new layout
int layoutSize = sizeof( GLMTexLayout ) + (sliceCount * sizeof( GLMTexLayoutSlice ));
GLMTexLayout *layout = (GLMTexLayout *)malloc( layoutSize );
memset( layout, 0, layoutSize );
// clone the key in there
memset( &layout->m_key, 0x00, sizeof(layout->m_key) );
layout->m_key = *key;
// set refcount
layout->m_refCount = 1;
// save the format desc
layout->m_format = (GLMTexFormatDesc *)formatDesc;
// we know the mipcount from before
layout->m_mipCount = mipCount;
// we know the face count too
layout->m_faceCount = faceCount;
// slice count is the product
layout->m_sliceCount = mipCount * faceCount;
// we can now fill in the slices.
GLMTexLayoutSlice *slicePtr = &layout->m_slices[0];
int storageOffset = 0;
//bool compressed = (formatDesc->m_chunkSize > 1); // true if DXT
for( int mip = 0; mip < mipCount; mip ++ )
{
for( int face = 0; face < faceCount; face++ )
{
// note application of chunk size which is 1 for uncompressed, and 4 for compressed tex (DXT)
// note also that the *dimensions* must scale down to 1
// but that the *storage* cannot go below 4x4.
// we introduce the "storage sizes" which are clamped, to compute the storage footprint.
int storage_x,storage_y,storage_z;
slicePtr->m_xSize = layout->m_key.m_xSize >> mip;
slicePtr->m_xSize = MAX( slicePtr->m_xSize, 1 ); // dimension can't go to zero
storage_x = MAX( slicePtr->m_xSize, formatDesc->m_chunkSize ); // storage extent can't go below chunk size
slicePtr->m_ySize = layout->m_key.m_ySize >> mip;
slicePtr->m_ySize = MAX( slicePtr->m_ySize, 1 ); // dimension can't go to zero
storage_y = MAX( slicePtr->m_ySize, formatDesc->m_chunkSize ); // storage extent can't go below chunk size
slicePtr->m_zSize = layout->m_key.m_zSize >> mip;
slicePtr->m_zSize = MAX( slicePtr->m_zSize, 1 ); // dimension can't go to zero
storage_z = MAX( slicePtr->m_zSize, 1); // storage extent for Z cannot go below '1'.
//if (compressed) NO NO NO do not lie about the dimensionality, just fudge the storage.
//{
// // round up to multiple of 4 in X and Y axes
// slicePtr->m_xSize = (slicePtr->m_xSize+3) & (~3);
// slicePtr->m_ySize = (slicePtr->m_ySize+3) & (~3);
//}
int xchunks = (storage_x / formatDesc->m_chunkSize );
int ychunks = (storage_y / formatDesc->m_chunkSize );
slicePtr->m_storageSize = (xchunks * ychunks * formatDesc->m_bytesPerSquareChunk) * storage_z;
slicePtr->m_storageOffset = storageOffset;
storageOffset += slicePtr->m_storageSize;
storageOffset = ( (storageOffset+0x0F) & (~0x0F)); // keep each MIP starting on a 16 byte boundary.
slicePtr++;
}
}
layout->m_storageTotalSize = storageOffset;
//printf("\n size %08x for key (x=%d y=%d z=%d, fmt=%08x, bpsc=%d)", layout->m_storageTotalSize, key->m_xSize, key->m_ySize, key->m_zSize, key->m_texFormat, formatDesc->m_bytesPerSquareChunk );
// generate summary
// "target, format, +/- mips, base size"
char scratch[1024];
const char *targetname = "?";
switch( key->m_texGLTarget )
{
case GL_TEXTURE_2D: targetname = "2D "; break;
case GL_TEXTURE_3D: targetname = "3D "; break;
case GL_TEXTURE_CUBE_MAP: targetname = "CUBE"; break;
}
sprintf( scratch, "[%s %s %dx%dx%d mips=%d slices=%d flags=%02lX%s]",
targetname,
formatDesc->m_formatSummary,
layout->m_key.m_xSize, layout->m_key.m_ySize, layout->m_key.m_zSize,
mipCount,
sliceCount,
layout->m_key.m_texFlags,
(layout->m_key.m_texFlags & kGLMTexSRGB) ? " SRGB" : ""
);
layout->m_layoutSummary = strdup( scratch );
//GLMPRINTF(("-D- new tex layout [ %s ]", scratch ));
// then insert into map. disregard returned index.
m_layoutMap.Insert( layout->m_key, layout );
return layout;
}
}
void CGLMTexLayoutTable::DelLayoutRef( GLMTexLayout *layout )
{
// locate layout in hash, drop refcount
// (some GC step later on will harvest expired layouts - not like it's any big challenge to re-generate them)
unsigned short index = m_layoutMap.Find( layout->m_key );
if (index != m_layoutMap.InvalidIndex())
{
// found it
GLMTexLayout *layout = m_layoutMap[ index ];
// drop ref count
layout->m_refCount --;
//assert( layout->m_refCount >= 0 );
}
else
{
// that's bad
GLMStop();
}
}
void CGLMTexLayoutTable::DumpStats( )
{
for (uint i=0; i<m_layoutMap.Count(); i++ )
{
GLMTexLayout *layout = m_layoutMap[ i ];
// print it out
printf("\n%05d instances %08d bytes %08d totbytes %s", layout->m_refCount, layout->m_storageTotalSize, (layout->m_refCount*layout->m_storageTotalSize), layout->m_layoutSummary );
}
}
ConVar gl_texmsaalog ( "gl_texmsaalog", "0");
ConVar gl_rt_forcergba ( "gl_rt_forcergba", "1" ); // on teximage of a renderable tex, pass GL_RGBA in place of GL_BGRA
ConVar gl_minimize_rt_tex ( "gl_minimize_rt_tex", "0" ); // if 1, set the GL_TEXTURE_MINIMIZE_STORAGE_APPLE texture parameter to cut off mipmaps for RT's
ConVar gl_minimize_all_tex ( "gl_minimize_all_tex", "1" ); // if 1, set the GL_TEXTURE_MINIMIZE_STORAGE_APPLE texture parameter to cut off mipmaps for textures which are unmipped
ConVar gl_minimize_tex_log ( "gl_minimize_tex_log", "0" ); // if 1, printf the names of the tex that got minimized
CGLMTex::CGLMTex( GLMContext *ctx, GLMTexLayout *layout, uint levels, const char *debugLabel )
{
#if GLMDEBUG
m_pPrevTex = NULL;
m_pNextTex = g_pFirstCGMLTex;
if ( m_pNextTex )
{
Assert( m_pNextTex->m_pPrevTex == NULL );
m_pNextTex->m_pPrevTex = this;
}
g_pFirstCGMLTex = this;
#endif
// caller has responsibility to make 'ctx' current, but we check to be sure.
ctx->CheckCurrent();
m_nLastResolvedBatchCounter = ctx->m_nBatchCounter;
// note layout requested
m_layout = layout;
m_texGLTarget = m_layout->m_key.m_texGLTarget;
m_nSamplerType = SAMPLER_TYPE_UNUSED;
switch ( m_texGLTarget )
{
case GL_TEXTURE_CUBE_MAP: m_nSamplerType = SAMPLER_TYPE_CUBE; break;
case GL_TEXTURE_2D: m_nSamplerType = SAMPLER_TYPE_2D; break;
case GL_TEXTURE_3D: m_nSamplerType = SAMPLER_TYPE_3D; break;
default:
Assert( 0 );
break;
}
m_maxActiveMip = -1; //index of highest mip that has been written - increase as each mip arrives
m_minActiveMip = 999; //index of lowest mip that has been written - lower it as each mip arrives
// note context owner
m_ctx = ctx;
// clear the bind point flags
//m_bindPoints.ClearAll();
// clear the RT attach count
m_rtAttachCount = 0;
// come up with a GL name for this texture.
// for MTGL friendliness, we should generate our own names at some point..
gGL->glGenTextures( 1, &m_texName );
m_pBlitSrcFBO = NULL;
m_pBlitDstFBO = NULL;
// Sense whether to try and apply client storage upon teximage/subimage.
// This should only be true if we're running on OSX 10.6 or it was explicitly
// enabled with -gl_texclientstorage on the command line.
m_texClientStorage = ctx->m_bTexClientStorage;
// flag that we have not yet been explicitly kicked into VRAM..
m_texPreloaded = false;
// clone the debug label if there is one.
m_debugLabel = debugLabel ? strdup(debugLabel) : NULL;
// if tex is MSAA renderable, make an RBO, else zero the RBO name and dirty bit
if (layout->m_key.m_texFlags & kGLMTexMultisampled)
{
gGL->glGenRenderbuffersEXT( 1, &m_rboName );
// so we have enough info to go ahead and bind the RBO and put storage on it?
// try it.
gGL->glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, m_rboName );
// quietly clamp if sample count exceeds known limit for the device
int sampleCount = layout->m_key.m_texSamples;
if (sampleCount > ctx->Caps().m_maxSamples)
{
sampleCount = ctx->Caps().m_maxSamples; // clamp
}
GLenum msaaFormat = (layout->m_key.m_texFlags & kGLMTexSRGB) ? layout->m_format->m_glIntFormatSRGB : layout->m_format->m_glIntFormat;
gGL->glRenderbufferStorageMultisampleEXT( GL_RENDERBUFFER_EXT,
sampleCount, // not "layout->m_key.m_texSamples"
msaaFormat,
layout->m_key.m_xSize,
layout->m_key.m_ySize );
if (gl_texmsaalog.GetInt())
{
printf( "\n == MSAA Tex %p %s : MSAA RBO is intformat %s (%x)", this, m_debugLabel?m_debugLabel:"", GLMDecode( eGL_ENUM, msaaFormat ), msaaFormat );
}
gGL->glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, 0 );
}
else
{
m_rboName = 0;
}
// at this point we have the complete description of the texture, and a name for it, but no data and no actual GL object.
// we know this name has bever seen duty before, so we're going to hard-bind it to TMU 0, displacing any other tex that might have been bound there.
// any previously bound tex will be unbound and appropriately marked as a result.
// the active TMU will be set as a side effect.
CGLMTex *pPrevTex = ctx->m_samplers[0].m_pBoundTex;
ctx->BindTexToTMU( this, 0 );
m_SamplingParams.SetToDefaults();
m_SamplingParams.SetToTarget( m_texGLTarget );
// OK, our texture now exists and is bound on the active TMU. Not drawable yet though.
// if not an RT, create backing storage and fill it
if ( !(layout->m_key.m_texFlags & kGLMTexRenderable) )
{
m_backing = (char *)malloc( m_layout->m_storageTotalSize );
memset( m_backing, 0, m_layout->m_storageTotalSize );
// track bytes allocated for non-RT's
int formindex = sEncodeLayoutAsIndex( &layout->m_key );
g_texGlobalBytes[ formindex ] += m_layout->m_storageTotalSize;
#if TEXSPACE_LOGGING
printf( "\n Tex %s added %d bytes in form %d which is now %d bytes", m_debugLabel ? m_debugLabel : "-", m_layout->m_storageTotalSize, formindex, g_texGlobalBytes[ formindex ] );
printf( "\n\t\t[ %d %d %d %d %d %d %d %d ]",
g_texGlobalBytes[ 0 ],g_texGlobalBytes[ 1 ],g_texGlobalBytes[ 2 ],g_texGlobalBytes[ 3 ],
g_texGlobalBytes[ 4 ],g_texGlobalBytes[ 5 ],g_texGlobalBytes[ 6 ],g_texGlobalBytes[ 7 ]
);
#endif
}
else
{
m_backing = NULL;
m_texClientStorage = false;
}
// init lock count
// lock reqs are tracked by the owning context
m_lockCount = 0;
m_sliceFlags.SetCount( m_layout->m_sliceCount );
for( int i=0; i< m_layout->m_sliceCount; i++)
{
m_sliceFlags[i] = 0;
// kSliceValid = false (we have not teximaged each slice yet)
// kSliceStorageValid = false (the storage allocated does not reflect what is in the tex)
// kSliceLocked = false (the slices are not locked)
// kSliceFullyDirty = false (this does not come true til first lock)
}
// texture minimize parameter keeps driver from allocing mips when it should not, by being explicit about the ones that have no mips.
bool setMinimizeParameter = false;
bool minimize_rt = (gl_minimize_rt_tex.GetInt()!=0);
bool minimize_all = (gl_minimize_all_tex.GetInt()!=0);
if (layout->m_key.m_texFlags & kGLMTexRenderable)
{
// it's an RT. if mips were not explicitly requested, and "gl_minimize_rt_tex" is true, set the minimize parameter.
if ( (minimize_rt || minimize_all) && ( !(layout->m_key.m_texFlags & kGLMTexMipped) ) )
{
setMinimizeParameter = true;
}
}
else
{
// not an RT. if mips were not requested, and "gl_minimize_all_tex" is true, set the minimize parameter.
if ( minimize_all && ( !(layout->m_key.m_texFlags & kGLMTexMipped) ) )
{
setMinimizeParameter = true;
}
}
if (setMinimizeParameter)
{
if (gl_minimize_tex_log.GetInt())
{
printf("\n minimizing storage for tex '%s' [%s] ", m_debugLabel?m_debugLabel:"-", m_layout->m_layoutSummary );
}
if (gGL->m_bHave_GL_APPLE_texture_range)
gGL->glTexParameteri( m_layout->m_key.m_texGLTarget, GL_TEXTURE_MINIMIZE_STORAGE_APPLE, 1 );
}
// after a lot of pain with texture completeness...
// always push black into all slices of all newly created textures.
#if 0
bool pushRenderableSlices = (m_layout->m_key.m_texFlags & kGLMTexRenderable) != 0;
bool pushTexSlices = true; // just do it everywhere (m_layout->m_mipCount>1) && (m_layout->m_format->m_chunkSize !=1) ;
if (pushTexSlices)
{
// fill storage with mostly-opaque purple
GLMGenTexelParams genp;
memset( &genp, 0, sizeof(genp) );
genp.m_format = m_layout->m_format->m_d3dFormat;
const GLMTexFormatDesc *format = GetFormatDesc( genp.m_format );
genp.m_dest = m_backing; // dest addr
genp.m_chunkCount = m_layout->m_storageTotalSize / format->m_bytesPerSquareChunk; // fill the whole slab
genp.m_byteCountLimit = m_layout->m_storageTotalSize; // limit writes to this amount
genp.r = 1.0;
genp.g = 0.0;
genp.b = 1.0;
genp.a = 0.75;
GLMGenTexels( &genp );
}
#endif
//if (pushRenderableSlices || pushTexSlices)
if ( !( ( layout->m_key.m_texFlags & kGLMTexMipped ) && ( levels == m_layout->m_mipCount ) ) )
{
for( int face=0; face <m_layout->m_faceCount; face++)
{
for( int mip=0; mip <m_layout->m_mipCount; mip++)
{
// we're not really going to lock, we're just going to write the blank data from the backing store we just made
GLMTexLockDesc desc;
desc.m_req.m_tex = this;
desc.m_req.m_face = face;
desc.m_req.m_mip = mip;
desc.m_sliceIndex = CalcSliceIndex( face, mip );
GLMTexLayoutSlice *slice = &m_layout->m_slices[ desc.m_sliceIndex ];
desc.m_req.m_region.xmin = desc.m_req.m_region.ymin = desc.m_req.m_region.zmin = 0;
desc.m_req.m_region.xmax = slice->m_xSize;
desc.m_req.m_region.ymax = slice->m_ySize;
desc.m_req.m_region.zmax = slice->m_zSize;
desc.m_sliceBaseOffset = slice->m_storageOffset; // doesn't really matter... we're just pushing zeroes..
desc.m_sliceRegionOffset = 0;
WriteTexels( &desc, true, (layout->m_key.m_texFlags & kGLMTexRenderable)!=0 ); // write whole slice - but disable data source if it's an RT, as there's no backing
}
}
}
GLMPRINTF(("-A- -**TEXNEW '%-60s' name=%06d size=%09d storage=%08x label=%s ", m_layout->m_layoutSummary, m_texName, m_layout->m_storageTotalSize, m_backing, m_debugLabel ? m_debugLabel : "-" ));
ctx->BindTexToTMU( pPrevTex, 0 );
}
CGLMTex::~CGLMTex( )
{
#if GLMDEBUG
if ( m_pPrevTex )
{
Assert( m_pPrevTex->m_pNextTex == this );
m_pPrevTex->m_pNextTex = m_pNextTex;
}
else
{
Assert( g_pFirstCGMLTex == this );
g_pFirstCGMLTex = m_pNextTex;
}
if ( m_pNextTex )
{
Assert( m_pNextTex->m_pPrevTex == this );
m_pNextTex->m_pPrevTex = m_pPrevTex;
}
m_pNextTex = m_pPrevTex = NULL;
#endif
if ( !(m_layout->m_key.m_texFlags & kGLMTexRenderable) )
{
int formindex = sEncodeLayoutAsIndex( &m_layout->m_key );
g_texGlobalBytes[ formindex ] -= m_layout->m_storageTotalSize;
#if TEXSPACE_LOGGING
printf( "\n Tex %s freed %d bytes in form %d which is now %d bytes", m_debugLabel ? m_debugLabel : "-", m_layout->m_storageTotalSize, formindex, g_texGlobalBytes[ formindex ] );
printf( "\n\t\t[ %d %d %d %d %d %d %d %d ]",
g_texGlobalBytes[ 0 ],g_texGlobalBytes[ 1 ],g_texGlobalBytes[ 2 ],g_texGlobalBytes[ 3 ],
g_texGlobalBytes[ 4 ],g_texGlobalBytes[ 5 ],g_texGlobalBytes[ 6 ],g_texGlobalBytes[ 7 ]
);
#endif
}
GLMPRINTF(("-A- -**TEXDEL '%-60s' name=%06d size=%09d storage=%08x label=%s ", m_layout->m_layoutSummary, m_texName, m_layout->m_storageTotalSize, m_backing, m_debugLabel ? m_debugLabel : "-" ));
// check first to see if we were still bound anywhere or locked... these should be failures.
if ( m_pBlitSrcFBO )
{
m_ctx->DelFBO( m_pBlitSrcFBO );
m_pBlitSrcFBO = NULL;
}
if ( m_pBlitDstFBO )
{
m_ctx->DelFBO( m_pBlitDstFBO );
m_pBlitDstFBO = NULL;
}
if ( m_rboName )
{
gGL->glDeleteRenderbuffersEXT( 1, &m_rboName );
m_rboName = 0;
}
// if all that is OK, then delete the underlying tex
if ( m_texName )
{
gGL->glDeleteTextures( 1, &m_texName );
m_texName = 0;
}
// release our usage of the layout