1///
2/// \file
3/// Contains the C implementation of ANTLR3 bitsets as adapted from Terence Parr's
4/// Java implementation.
5///
6
7// [The "BSD licence"]
8// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
9// http://www.temporal-wave.com
10// http://www.linkedin.com/in/jimidle
11//
12// All rights reserved.
13//
14// Redistribution and use in source and binary forms, with or without
15// modification, are permitted provided that the following conditions
16// are met:
17// 1. Redistributions of source code must retain the above copyright
18//    notice, this list of conditions and the following disclaimer.
19// 2. Redistributions in binary form must reproduce the above copyright
20//    notice, this list of conditions and the following disclaimer in the
21//    documentation and/or other materials provided with the distribution.
22// 3. The name of the author may not be used to endorse or promote products
23//    derived from this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
30// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
34// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35
36#include    <antlr3bitset.h>
37
38// External interface
39//
40
41static	pANTLR3_BITSET  antlr3BitsetClone		(pANTLR3_BITSET inSet);
42static	pANTLR3_BITSET  antlr3BitsetOR			(pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2);
43static	void			antlr3BitsetORInPlace	(pANTLR3_BITSET bitset, pANTLR3_BITSET bitset2);
44static	ANTLR3_UINT32	antlr3BitsetSize		(pANTLR3_BITSET bitset);
45static	void			antlr3BitsetAdd			(pANTLR3_BITSET bitset, ANTLR3_INT32 bit);
46static	ANTLR3_BOOLEAN	antlr3BitsetEquals		(pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2);
47static	ANTLR3_BOOLEAN	antlr3BitsetMember		(pANTLR3_BITSET bitset, ANTLR3_UINT32 bit);
48static	ANTLR3_UINT32	antlr3BitsetNumBits		(pANTLR3_BITSET bitset);
49static	void			antlr3BitsetRemove		(pANTLR3_BITSET bitset, ANTLR3_UINT32 bit);
50static	ANTLR3_BOOLEAN	antlr3BitsetIsNil		(pANTLR3_BITSET bitset);
51static	pANTLR3_INT32	antlr3BitsetToIntList	(pANTLR3_BITSET bitset);
52
53// Local functions
54//
55static	void			growToInclude		(pANTLR3_BITSET bitset, ANTLR3_INT32 bit);
56static	void			grow				(pANTLR3_BITSET bitset, ANTLR3_INT32 newSize);
57static	ANTLR3_UINT64	bitMask				(ANTLR3_UINT32 bitNumber);
58static	ANTLR3_UINT32	numWordsToHold		(ANTLR3_UINT32 bit);
59static	ANTLR3_UINT32	wordNumber			(ANTLR3_UINT32 bit);
60static	void			antlr3BitsetFree	(pANTLR3_BITSET bitset);
61
62static void
63antlr3BitsetFree(pANTLR3_BITSET bitset)
64{
65    if	(bitset->blist.bits != NULL)
66    {
67		ANTLR3_FREE(bitset->blist.bits);
68		bitset->blist.bits = NULL;
69    }
70    ANTLR3_FREE(bitset);
71
72    return;
73}
74
75ANTLR3_API pANTLR3_BITSET
76antlr3BitsetNew(ANTLR3_UINT32 numBits)
77{
78	pANTLR3_BITSET  bitset;
79
80	ANTLR3_UINT32   numelements;
81
82	// Allocate memory for the bitset structure itself
83	//
84	bitset  = (pANTLR3_BITSET) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BITSET));
85
86	if	(bitset == NULL)
87	{
88		return	NULL;
89	}
90
91	// Avoid memory thrashing at the up front expense of a few bytes
92	//
93	if	(numBits < (8 * ANTLR3_BITSET_BITS))
94	{
95		numBits = 8 * ANTLR3_BITSET_BITS;
96	}
97
98	// No we need to allocate the memory for the number of bits asked for
99	// in multiples of ANTLR3_UINT64.
100	//
101	numelements	= ((numBits -1) >> ANTLR3_BITSET_LOG_BITS) + 1;
102
103	bitset->blist.bits    = (pANTLR3_BITWORD) ANTLR3_MALLOC((size_t)(numelements * sizeof(ANTLR3_BITWORD)));
104	memset(bitset->blist.bits, 0, (size_t)(numelements * sizeof(ANTLR3_BITWORD)));
105	bitset->blist.length  = numelements;
106
107	if	(bitset->blist.bits == NULL)
108	{
109		ANTLR3_FREE(bitset);
110		return	NULL;
111	}
112
113	antlr3BitsetSetAPI(bitset);
114
115
116	// All seems good
117	//
118	return  bitset;
119}
120
121ANTLR3_API void
122antlr3BitsetSetAPI(pANTLR3_BITSET bitset)
123{
124    bitset->clone		=    antlr3BitsetClone;
125    bitset->bor			=    antlr3BitsetOR;
126    bitset->borInPlace	=    antlr3BitsetORInPlace;
127    bitset->size		=    antlr3BitsetSize;
128    bitset->add			=    antlr3BitsetAdd;
129    bitset->grow		=    grow;
130    bitset->equals		=    antlr3BitsetEquals;
131    bitset->isMember	=    antlr3BitsetMember;
132    bitset->numBits		=    antlr3BitsetNumBits;
133    bitset->remove		=    antlr3BitsetRemove;
134    bitset->isNilNode		=    antlr3BitsetIsNil;
135    bitset->toIntList	=    antlr3BitsetToIntList;
136
137    bitset->free		=    antlr3BitsetFree;
138}
139
140ANTLR3_API pANTLR3_BITSET
141antlr3BitsetCopy(pANTLR3_BITSET_LIST blist)
142{
143    pANTLR3_BITSET  bitset;
144	int				numElements;
145
146    // Allocate memory for the bitset structure itself
147    //
148    bitset  = (pANTLR3_BITSET) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BITSET));
149
150    if	(bitset == NULL)
151    {
152		return	NULL;
153    }
154
155	numElements = blist->length;
156
157    // Avoid memory thrashing at the expense of a few more bytes
158    //
159    if	(numElements < 8)
160    {
161		numElements = 8;
162    }
163
164    // Install the length in ANTLR3_UINT64 units
165    //
166    bitset->blist.length  = numElements;
167
168    bitset->blist.bits    = (pANTLR3_BITWORD)ANTLR3_MALLOC((size_t)(numElements * sizeof(ANTLR3_BITWORD)));
169
170    if	(bitset->blist.bits == NULL)
171    {
172		ANTLR3_FREE(bitset);
173		return	NULL;
174    }
175
176	ANTLR3_MEMCPY(bitset->blist.bits, blist->bits, (ANTLR3_UINT64)(numElements * sizeof(ANTLR3_BITWORD)));
177
178    // All seems good
179    //
180    return  bitset;
181}
182
183static pANTLR3_BITSET
184antlr3BitsetClone(pANTLR3_BITSET inSet)
185{
186    pANTLR3_BITSET  bitset;
187
188    // Allocate memory for the bitset structure itself
189    //
190    bitset  = antlr3BitsetNew(ANTLR3_BITSET_BITS * inSet->blist.length);
191
192    if	(bitset == NULL)
193    {
194		return	NULL;
195    }
196
197    // Install the actual bits in the source set
198    //
199    ANTLR3_MEMCPY(bitset->blist.bits, inSet->blist.bits, (ANTLR3_UINT64)(inSet->blist.length * sizeof(ANTLR3_BITWORD)));
200
201    // All seems good
202    //
203    return  bitset;
204}
205
206
207ANTLR3_API pANTLR3_BITSET
208antlr3BitsetList(pANTLR3_HASH_TABLE list)
209{
210    pANTLR3_BITSET		bitSet;
211    pANTLR3_HASH_ENUM	en;
212    pANTLR3_HASH_KEY	key;
213    ANTLR3_UINT64		bit;
214
215    // We have no idea what exactly is in the list
216    // so create a default bitset and then just add stuff
217    // as we enumerate.
218    //
219    bitSet  = antlr3BitsetNew(0);
220
221    en		= antlr3EnumNew(list);
222
223    while   (en->next(en, &key, (void **)(&bit)) == ANTLR3_SUCCESS)
224    {
225		bitSet->add(bitSet, (ANTLR3_UINT32)bit);
226    }
227    en->free(en);
228
229    return NULL;
230}
231
232///
233/// \brief
234/// Creates a new bitset with at least one 64 bit bset of bits, but as
235/// many 64 bit sets as are required.
236///
237/// \param[in] bset
238/// A variable number of bits to add to the set, ending in -1 (impossible bit).
239///
240/// \returns
241/// A new bit set with all of the specified bitmaps in it and the API
242/// initialized.
243///
244/// Call as:
245///  - pANTLR3_BITSET = antlrBitsetLoad(bset, bset11, ..., -1);
246///  - pANTLR3_BITSET = antlrBitsetOf(-1);  Create empty bitset
247///
248/// \remarks
249/// Stdargs function - must supply -1 as last paremeter, which is NOT
250/// added to the set.
251///
252///
253ANTLR3_API pANTLR3_BITSET
254antlr3BitsetLoad(pANTLR3_BITSET_LIST inBits)
255{
256	pANTLR3_BITSET  bitset;
257	ANTLR3_UINT32  count;
258
259	// Allocate memory for the bitset structure itself
260	// the input parameter is the bit number (0 based)
261	// to include in the bitset, so we need at at least
262	// bit + 1 bits. If any arguments indicate a
263	// a bit higher than the default number of bits (0 means default size)
264	// then Add() will take care
265	// of it.
266	//
267	bitset  = antlr3BitsetNew(0);
268
269	if	(bitset == NULL)
270	{
271		return	NULL;
272	}
273
274	if	(inBits != NULL)
275	{
276		// Now we can add the element bits into the set
277		//
278		count=0;
279		while (count < inBits->length)
280		{
281			if  (bitset->blist.length <= count)
282			{
283				bitset->grow(bitset, count+1);
284			}
285
286			bitset->blist.bits[count] = *((inBits->bits)+count);
287			count++;
288		}
289	}
290
291	// return the new bitset
292	//
293	return  bitset;
294}
295
296///
297/// \brief
298/// Creates a new bitset with at least one element, but as
299/// many elements are required.
300///
301/// \param[in] bit
302/// A variable number of bits to add to the set, ending in -1 (impossible bit).
303///
304/// \returns
305/// A new bit set with all of the specified elements added into it.
306///
307/// Call as:
308///  - pANTLR3_BITSET = antlrBitsetOf(n, n1, n2, -1);
309///  - pANTLR3_BITSET = antlrBitsetOf(-1);  Create empty bitset
310///
311/// \remarks
312/// Stdargs function - must supply -1 as last paremeter, which is NOT
313/// added to the set.
314///
315///
316ANTLR3_API pANTLR3_BITSET
317antlr3BitsetOf(ANTLR3_INT32 bit, ...)
318{
319    pANTLR3_BITSET  bitset;
320
321    va_list ap;
322
323    // Allocate memory for the bitset structure itself
324    // the input parameter is the bit number (0 based)
325    // to include in the bitset, so we need at at least
326    // bit + 1 bits. If any arguments indicate a
327    // a bit higher than the default number of bits (0 menas default size)
328    // then Add() will take care
329    // of it.
330    //
331    bitset  = antlr3BitsetNew(0);
332
333    if	(bitset == NULL)
334    {
335		return	NULL;
336    }
337
338    // Now we can add the element bits into the set
339    //
340    va_start(ap, bit);
341    while   (bit != -1)
342    {
343		antlr3BitsetAdd(bitset, bit);
344		bit = va_arg(ap, ANTLR3_UINT32);
345    }
346    va_end(ap);
347
348    // return the new bitset
349    //
350    return  bitset;
351}
352
353static pANTLR3_BITSET
354antlr3BitsetOR(pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2)
355{
356    pANTLR3_BITSET  bitset;
357
358    if	(bitset1 == NULL)
359    {
360		return antlr3BitsetClone(bitset2);
361    }
362
363    if	(bitset2 == NULL)
364    {
365		return	antlr3BitsetClone(bitset1);
366    }
367
368    // Allocate memory for the newly ordered bitset structure itself.
369    //
370    bitset  = antlr3BitsetClone(bitset1);
371
372    antlr3BitsetORInPlace(bitset, bitset2);
373
374    return  bitset;
375
376}
377
378static void
379antlr3BitsetAdd(pANTLR3_BITSET bitset, ANTLR3_INT32 bit)
380{
381    ANTLR3_UINT32   word;
382
383    word    = wordNumber(bit);
384
385    if	(word	>= bitset->blist.length)
386    {
387		growToInclude(bitset, bit);
388    }
389
390    bitset->blist.bits[word] |= bitMask(bit);
391
392}
393
394static void
395grow(pANTLR3_BITSET bitset, ANTLR3_INT32 newSize)
396{
397    pANTLR3_BITWORD   newBits;
398
399    // Space for newly sized bitset - TODO: come back to this and use realloc?, it may
400    // be more efficient...
401    //
402    newBits = (pANTLR3_BITWORD) ANTLR3_CALLOC(1, (size_t)(newSize * sizeof(ANTLR3_BITWORD)));
403    if	(bitset->blist.bits != NULL)
404    {
405		// Copy existing bits
406		//
407		ANTLR3_MEMCPY((void *)newBits, (const void *)bitset->blist.bits, (size_t)(bitset->blist.length * sizeof(ANTLR3_BITWORD)));
408
409		// Out with the old bits... de de de derrr
410		//
411		ANTLR3_FREE(bitset->blist.bits);
412    }
413
414    // In with the new bits... keerrrang.
415    //
416    bitset->blist.bits      = newBits;
417    bitset->blist.length    = newSize;
418}
419
420static void
421growToInclude(pANTLR3_BITSET bitset, ANTLR3_INT32 bit)
422{
423	ANTLR3_UINT32	bl;
424	ANTLR3_UINT32	nw;
425
426	bl = (bitset->blist.length << 1);
427	nw = numWordsToHold(bit);
428
429	if	(bl > nw)
430	{
431		bitset->grow(bitset, bl);
432	}
433	else
434	{
435		bitset->grow(bitset, nw);
436	}
437}
438
439static void
440antlr3BitsetORInPlace(pANTLR3_BITSET bitset, pANTLR3_BITSET bitset2)
441{
442    ANTLR3_UINT32   minimum;
443    ANTLR3_UINT32   i;
444
445    if	(bitset2 == NULL)
446    {
447		return;
448    }
449
450
451    // First make sure that the target bitset is big enough
452    // for the new bits to be ored in.
453    //
454    if	(bitset->blist.length < bitset2->blist.length)
455    {
456		growToInclude(bitset, (bitset2->blist.length * sizeof(ANTLR3_BITWORD)));
457    }
458
459    // Or the miniimum number of bits after any resizing went on
460    //
461    if	(bitset->blist.length < bitset2->blist.length)
462	{
463		minimum = bitset->blist.length;
464	}
465	else
466	{
467		minimum = bitset2->blist.length;
468	}
469
470    for	(i = minimum; i > 0; i--)
471    {
472		bitset->blist.bits[i-1] |= bitset2->blist.bits[i-1];
473    }
474}
475
476static ANTLR3_UINT64
477bitMask(ANTLR3_UINT32 bitNumber)
478{
479    return  ((ANTLR3_UINT64)1) << (bitNumber & (ANTLR3_BITSET_MOD_MASK));
480}
481
482static ANTLR3_UINT32
483antlr3BitsetSize(pANTLR3_BITSET bitset)
484{
485    ANTLR3_UINT32   degree;
486    ANTLR3_INT32   i;
487    ANTLR3_INT8    bit;
488
489    // TODO: Come back to this, it may be faster to & with 0x01
490    // then shift right a copy of the 4 bits, than shift left a constant of 1.
491    // But then again, the optimizer might just work this out
492    // anyway.
493    //
494    degree  = 0;
495    for	(i = bitset->blist.length - 1; i>= 0; i--)
496    {
497		if  (bitset->blist.bits[i] != 0)
498		{
499			for	(bit = ANTLR3_BITSET_BITS - 1; bit >= 0; bit--)
500			{
501				if  ((bitset->blist.bits[i] & (((ANTLR3_BITWORD)1) << bit)) != 0)
502				{
503					degree++;
504				}
505			}
506		}
507    }
508    return degree;
509}
510
511static ANTLR3_BOOLEAN
512antlr3BitsetEquals(pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2)
513{
514    ANTLR3_INT32   minimum;
515    ANTLR3_INT32   i;
516
517    if	(bitset1 == NULL || bitset2 == NULL)
518    {
519	return	ANTLR3_FALSE;
520    }
521
522    // Work out the minimum comparison set
523    //
524    if	(bitset1->blist.length < bitset2->blist.length)
525    {
526		minimum = bitset1->blist.length;
527    }
528    else
529    {
530		minimum = bitset2->blist.length;
531    }
532
533    // Make sure explict in common bits are equal
534    //
535    for	(i = minimum - 1; i >=0 ; i--)
536    {
537		if  (bitset1->blist.bits[i] != bitset2->blist.bits[i])
538		{
539			return  ANTLR3_FALSE;
540		}
541    }
542
543    // Now make sure the bits of the larger set are all turned
544    // off.
545    //
546    if	(bitset1->blist.length > (ANTLR3_UINT32)minimum)
547    {
548		for (i = minimum ; (ANTLR3_UINT32)i < bitset1->blist.length; i++)
549		{
550			if	(bitset1->blist.bits[i] != 0)
551			{
552				return	ANTLR3_FALSE;
553			}
554		}
555    }
556    else if (bitset2->blist.length > (ANTLR3_UINT32)minimum)
557    {
558		for (i = minimum; (ANTLR3_UINT32)i < bitset2->blist.length; i++)
559		{
560			if	(bitset2->blist.bits[i] != 0)
561			{
562				return	ANTLR3_FALSE;
563			}
564		}
565    }
566
567    return  ANTLR3_TRUE;
568}
569
570static ANTLR3_BOOLEAN
571antlr3BitsetMember(pANTLR3_BITSET bitset, ANTLR3_UINT32 bit)
572{
573    ANTLR3_UINT32    wordNo;
574
575    wordNo  = wordNumber(bit);
576
577    if	(wordNo >= bitset->blist.length)
578    {
579		return	ANTLR3_FALSE;
580    }
581
582    if	((bitset->blist.bits[wordNo] & bitMask(bit)) == 0)
583    {
584		return	ANTLR3_FALSE;
585    }
586    else
587    {
588		return	ANTLR3_TRUE;
589    }
590}
591
592static void
593antlr3BitsetRemove(pANTLR3_BITSET bitset, ANTLR3_UINT32 bit)
594{
595    ANTLR3_UINT32    wordNo;
596
597    wordNo  = wordNumber(bit);
598
599    if	(wordNo < bitset->blist.length)
600    {
601		bitset->blist.bits[wordNo] &= ~(bitMask(bit));
602    }
603}
604static ANTLR3_BOOLEAN
605antlr3BitsetIsNil(pANTLR3_BITSET bitset)
606{
607   ANTLR3_INT32    i;
608
609   for	(i = bitset->blist.length -1; i>= 0; i--)
610   {
611       if   (bitset->blist.bits[i] != 0)
612       {
613			return ANTLR3_FALSE;
614       }
615   }
616
617   return   ANTLR3_TRUE;
618}
619
620static ANTLR3_UINT32
621numWordsToHold(ANTLR3_UINT32 bit)
622{
623    return  (bit >> ANTLR3_BITSET_LOG_BITS) + 1;
624}
625
626static	ANTLR3_UINT32
627wordNumber(ANTLR3_UINT32 bit)
628{
629    return  bit >> ANTLR3_BITSET_LOG_BITS;
630}
631
632static ANTLR3_UINT32
633antlr3BitsetNumBits(pANTLR3_BITSET bitset)
634{
635    return  bitset->blist.length << ANTLR3_BITSET_LOG_BITS;
636}
637
638/** Produce an integer list of all the bits that are turned on
639 *  in this bitset. Used for error processing in the main as the bitset
640 *  reresents a number of integer tokens which we use for follow sets
641 *  and so on.
642 *
643 *  The first entry is the number of elements following in the list.
644 */
645static	pANTLR3_INT32
646antlr3BitsetToIntList	(pANTLR3_BITSET bitset)
647{
648    ANTLR3_UINT32   numInts;	    // How many integers we will need
649    ANTLR3_UINT32   numBits;	    // How many bits are in the set
650    ANTLR3_UINT32   i;
651    ANTLR3_UINT32   index;
652
653    pANTLR3_INT32  intList;
654
655    numInts = bitset->size(bitset) + 1;
656    numBits = bitset->numBits(bitset);
657
658    intList = (pANTLR3_INT32)ANTLR3_MALLOC(numInts * sizeof(ANTLR3_INT32));
659
660    if	(intList == NULL)
661    {
662		return NULL;	// Out of memory
663    }
664
665    intList[0] = numInts;
666
667    // Enumerate the bits that are turned on
668    //
669    for	(i = 0, index = 1; i<numBits; i++)
670    {
671		if  (bitset->isMember(bitset, i) == ANTLR3_TRUE)
672		{
673			intList[index++]    = i;
674		}
675    }
676
677    // Result set
678    //
679    return  intList;
680}
681
682