• Main Page
  • Related Pages
  • Classes
  • Files
  • File List
  • File Members

public/sm_trie_tpl.h

Go to the documentation of this file.
00001 /**
00002  * vim: set ts=4 :
00003  * =============================================================================
00004  * SourceMod
00005  * Copyright (C) 2004-2008 AlliedModders LLC.  All rights reserved.
00006  * =============================================================================
00007  *
00008  * This program is free software; you can redistribute it and/or modify it under
00009  * the terms of the GNU General Public License, version 3.0, as published by the
00010  * Free Software Foundation.
00011  * 
00012  * This program is distributed in the hope that it will be useful, but WITHOUT
00013  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00014  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00015  * details.
00016  *
00017  * You should have received a copy of the GNU General Public License along with
00018  * this program.  If not, see <http://www.gnu.org/licenses/>.
00019  *
00020  * As a special exception, AlliedModders LLC gives you permission to link the
00021  * code of this program (as well as its derivative works) to "Half-Life 2," the
00022  * "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
00023  * by the Valve Corporation.  You must obey the GNU General Public License in
00024  * all respects for all other code used.  Additionally, AlliedModders LLC grants
00025  * this exception to all derivative works.  AlliedModders LLC defines further
00026  * exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
00027  * or <http://www.sourcemod.net/license.php>.
00028  *
00029  * Version: $Id$
00030  */
00031 
00032 #ifndef _INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_
00033 #define _INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_
00034 
00035 #include <new>
00036 #include <string.h>
00037 #include <stdlib.h>
00038 #include <assert.h>
00039 
00040 enum NodeType
00041 {
00042           Node_Unused = 0,              /* Node is not being used (sparse) */
00043           Node_Arc,                               /* Node is part of an arc and does not terminate */
00044           Node_Term,                                        /* Node is a terminator */
00045 };
00046 
00047 /**
00048  * @brief Trie class for storing key/value pairs, based on double array tries.
00049  * @file sm_trie_tpl.h
00050  *
00051  * For full works cited and implementation overview, there is a big comment 
00052  * block at the bottom of this file.
00053  */
00054 
00055 template <typename K>
00056 class KTrie
00057 {
00058           class KTrieNode;
00059 public:
00060           /**
00061            * @brief Clears all set objects in the trie.
00062            */
00063           void clear()
00064           {
00065                     run_destructors();
00066                     internal_clear();
00067           }
00068 
00069           /**
00070            * @brief Removes a key from the trie.
00071            *
00072            * @param key                 Key to remove.
00073            * @return                              True on success, false if key was never set.
00074            */
00075           bool remove(const char *key)
00076           {
00077                     KTrieNode *node = internal_retrieve(key);
00078                     if (!node || !node->valset)
00079                     {
00080                               return false;
00081                     }
00082 
00083                     node->value.~K();
00084                     node->valset = false;
00085 
00086                     m_numElements--;
00087 
00088                     return true;
00089           }
00090 
00091           /**
00092            * @brief Retrieves a pointer to the object stored at a given key.
00093            *
00094            * @param key                 Key to retrieve.
00095            * @return                              Pointer to object, or NULL if key was not found or not set.
00096            */
00097           K * retrieve(const char *key)
00098           {
00099                     KTrieNode *node = internal_retrieve(key);
00100                     if (!node || !node->valset)
00101                     {
00102                               return NULL;
00103                     }
00104                     return &node->value;
00105           }
00106 
00107           /**
00108            * @brief Inserts or updates the object stored at a key.
00109            *
00110            * @param key                 Key to update or insert.
00111            * @param obj                 Object to store at the key.
00112            * @return                              True on success, false on failure.
00113            */
00114           bool replace(const char *key, const K & obj)
00115           {
00116                     KTrieNode *prev_node = internal_retrieve(key);
00117                     if (!prev_node)
00118                     {
00119                               return insert(key, obj);
00120                     }
00121 
00122                     if (prev_node->valset)
00123                     {
00124                               prev_node->value.~K();
00125                     }
00126 
00127                     new (&prev_node->value) K(obj);
00128 
00129                     return true;
00130           }
00131 
00132           /**
00133            * @brief Inserts an object at a key.
00134            *
00135            * @param key                 Key to insert at.
00136            * @param obj                 Object to store at the key.
00137            * @return                              True on success, false if the key is already set or 
00138            *                                                insertion otherwise failed.
00139            */
00140           bool insert(const char *key, const K & obj)
00141           {
00142                     unsigned int lastidx = 1;               /* the last node index */
00143                     unsigned int curidx;                              /* current node index */
00144                     const char *keyptr = key;               /* input stream at current token */
00145                     KTrieNode *node = NULL;                           /* current node being processed */
00146                     KTrieNode *basenode = NULL;             /* current base node being processed */
00147                     unsigned int q;                                             /* temporary var for x_check results */
00148                     unsigned int curoffs;                             /* current offset */
00149 
00150                     /**
00151                      * Empty strings are a special case, since there are no productions.  We could 
00152                      * probably rework it to use BASE[0] but this hack is easier.
00153                      */
00154                     if (*key == '\0')
00155                     {
00156                               if (m_empty != NULL && m_empty->valset)
00157                               {
00158                                         return false;
00159                               }
00160 
00161                               if (m_empty == NULL)
00162                               {
00163                                         m_empty = (KTrieNode *)malloc(sizeof(KTrieNode));
00164                               }
00165 
00166                               m_empty->valset = true;
00167                               new (&m_empty->value) K(obj);
00168 
00169                               m_numElements++;
00170 
00171                               return true;
00172                     }
00173 
00174                     /* Start traversing at the root node (1) */
00175                     do
00176                     {
00177                               /* Find where the next character is, then advance */
00178                               curidx = m_base[lastidx].idx;
00179                               basenode = &m_base[curidx];
00180                               curoffs = charval(*keyptr);
00181                               curidx += curoffs;
00182                               node = &m_base[curidx];
00183                               keyptr++;
00184 
00185                               /* Check if this slot is supposed to be empty.  If so, we need to handle CASES 1/2:
00186                                * Insertion without collisions
00187                                */
00188                               if ( (curidx > m_baseSize) || (node->mode == Node_Unused) )
00189                               {
00190                                         if (curidx > m_baseSize)
00191                                         {
00192                                                   if (!grow())
00193                                                   {
00194                                                             return false;
00195                                                   }
00196                                                   node = &m_base[curidx];
00197                                         }
00198                                         node->parent = lastidx;
00199                                         if (*keyptr == '\0')
00200                                         {
00201                                                   node->mode = Node_Arc;
00202                                         }
00203                                         else
00204                                         {
00205                                                   node->idx = x_addstring(keyptr);
00206                                                   node->mode = Node_Term;
00207                                         }
00208                                         node->valset = true;
00209                                         new (&node->value) K(obj);
00210 
00211                                         m_numElements++;
00212 
00213                                         return true;
00214                               }
00215                               else if (node->parent != lastidx)
00216                               {
00217                                         /* Collision! We have to split up the tree here.  CASE 4:
00218                                          * Insertion when a new word is inserted with a collision.
00219                                          * NOTE: This is the hardest case to handle.  All below examples are based on:
00220                                          * BACHELOR, BADGE, inserting BABY.
00221                                          * The problematic production here is A -> B, where B is already being used.
00222                                    *
00223                                          * This process has to rotate one half of the 'A' arc.  We generate two lists:
00224                                          *  Outgoing Arcs - Anything leaving this 'A'
00225                                          *  Incoming Arcs - Anything going to this 'A'
00226                                          * Whichever list is smaller will be moved.  Note that this works because the intersection
00227                                          * affects both arc chains, and moving one will make the slot available to either.
00228                                          */
00229                                         KTrieNode *cur;
00230 
00231                                         /* Find every node arcing from the last node.
00232                                          * I.e. for BACHELOR, BADGE, BABY,
00233                                          * The arcs leaving A will be C and D, but our current node is B -> *.
00234                                          * Thus, we use the last index (A) to find the base for arcs leaving A.
00235                                          */
00236                                         unsigned int outgoing_base = m_base[lastidx].idx;
00237                                         unsigned int outgoing_list[256];
00238                                         unsigned int outgoing_count = 0;        /* count the current index here */
00239                                         cur = &m_base[outgoing_base] + 1;
00240                                         unsigned int outgoing_limit = 255;
00241 
00242                                         if (outgoing_base + outgoing_limit > m_baseSize)
00243                                         {
00244                                                   outgoing_limit = m_baseSize - outgoing_base;
00245                                         }
00246 
00247                                         for (unsigned int i=1; i<=outgoing_limit; i++,cur++)
00248                                         {
00249                                                   if (cur->mode == Node_Unused || cur->parent != lastidx)
00250                                                   {
00251                                                             continue;
00252                                                   }
00253                                                   outgoing_list[outgoing_count++] = i;
00254                                         }
00255                                         outgoing_list[outgoing_count++] = curidx - outgoing_base;
00256 
00257                                         /* Now we need to find all the arcs leaving our parent...
00258                                          * Note: the inconsistency is the base of our parent.  
00259                                          */
00260                                         assert(m_base[node->parent].mode == Node_Arc);
00261                                         unsigned int incoming_list[256];
00262                                         unsigned int incoming_base = m_base[node->parent].idx;
00263                                         unsigned int incoming_count = 0;
00264                                         unsigned int incoming_limit = 255;
00265                                         cur = &m_base[incoming_base] + 1;
00266 
00267                                         if (incoming_base + incoming_limit > m_baseSize)
00268                                         {
00269                                                   incoming_limit = m_baseSize - incoming_base;
00270                                         }
00271 
00272                                         assert(incoming_limit > 0 && incoming_limit <= 255);
00273 
00274                                         for (unsigned int i=1; i<=incoming_limit; i++,cur++)
00275                                         {
00276                                                   if (cur->mode == Node_Arc || cur->mode == Node_Term)
00277                                                   {
00278                                                             if (cur->parent == node->parent)
00279                                                             {
00280                                                                       incoming_list[incoming_count++] = i;
00281                                                             }
00282                                                   }
00283                                         }
00284 
00285                                         if (incoming_count < outgoing_count + 1)
00286                                         {
00287                                                   unsigned int q = x_check_multi(incoming_list, incoming_count);
00288 
00289                                                   node = &m_base[curidx];
00290 
00291                                                   /* If we're incoming, we need to modify our parent */
00292                                                   m_base[node->parent].idx = q;
00293 
00294                                                   /* For each node in the "to move" list,
00295                                                    * Relocate the node's info to the new position.
00296                                                    */
00297                                                   unsigned int idx, newidx, oldidx;
00298                                                   for (unsigned int i=0; i<incoming_count; i++)
00299                                                   {
00300                                                             idx = incoming_list[i];
00301                                                             newidx = q + idx;
00302                                                             oldidx = incoming_base + idx;
00303                                                             if (oldidx == lastidx)
00304                                                             {
00305                                                                       /* Important! Make sure we're not invalidating our sacred lastidx */
00306                                                                       lastidx = newidx;
00307                                                             }
00308                                                             /* Fully copy the node */
00309                                                             memcpy(&m_base[newidx], &m_base[oldidx], sizeof(KTrieNode));
00310                                                             if (m_base[oldidx].valset)
00311                                                             {
00312                                                                       new (&m_base[newidx].value) K(m_base[oldidx].value);
00313                                                                       m_base[oldidx].value.~K();
00314                                                             }
00315                                                             assert(m_base[m_base[newidx].parent].mode == Node_Arc);
00316                                                             /* Erase old data */
00317                                                             memset(&m_base[oldidx], 0, sizeof(KTrieNode));
00318                                                             /* If we are not a terminator, we have children we must take care of */
00319                                                             if (m_base[newidx].mode == Node_Arc)
00320                                                             {
00321                                                                       KTrieNode *check_base = &m_base[m_base[newidx].idx] + 1;
00322                                                                       outgoing_limit = (m_base + m_baseSize + 1) - check_base;
00323                                                                       if (outgoing_limit > 255)
00324                                                                       {
00325                                                                                 outgoing_limit = 255;
00326                                                                       }
00327                                                                       for (unsigned int j=1; j<=outgoing_limit; j++, check_base++)
00328                                                                       {
00329                                                                                 if (check_base->parent == oldidx)
00330                                                                                 {
00331                                                                                           check_base->parent = newidx;
00332                                                                                 }
00333                                                                       }
00334                                                             }
00335                                                   }
00336                                         }
00337                                         else
00338                                         {
00339                                                   unsigned int q = x_check_multi(outgoing_list, outgoing_count);
00340 
00341                                                   node = &m_base[curidx];
00342 
00343                                                   /* If we're outgoing, we need to modify our own base */
00344                                                   m_base[lastidx].idx = q;
00345 
00346                                                   /* Take the last index (curidx) out of the list.  Technically we are not moving this,
00347                                                    * since it's already being used by something else.  
00348                                                    */
00349                                                   outgoing_count--;
00350 
00351                                                   /* For each node in the "to move" list,
00352                                                    * Relocate the node's info to the new position.
00353                                                    */
00354                                                   unsigned int idx, newidx, oldidx;
00355                                                   for (unsigned int i=0; i<outgoing_count; i++)
00356                                                   {
00357                                                             idx = outgoing_list[i];
00358                                                             newidx = q + idx;
00359                                                             oldidx = outgoing_base + idx;
00360                                                             if (oldidx == lastidx)
00361                                                             {
00362                                                                       /* Important! Make sure we're not invalidating our sacred lastidx */
00363                                                                       lastidx = newidx;
00364                                                             }
00365                                                             /* Fully copy the node */
00366                                                             memcpy(&m_base[newidx], &m_base[oldidx], sizeof(KTrieNode));
00367                                                             if (m_base[oldidx].valset)
00368                                                             {
00369                                                                       new (&m_base[newidx].value) K(m_base[oldidx].value);
00370                                                                       m_base[oldidx].value.~K();
00371                                                             }
00372                                                             assert(m_base[m_base[newidx].parent].mode == Node_Arc);
00373                                                             /* Erase old data */
00374                                                             memset(&m_base[oldidx], 0, sizeof(KTrieNode));
00375                                                             /* If we are not a terminator, we have children we must take care of */
00376                                                             if (m_base[newidx].mode == Node_Arc)
00377                                                             {
00378                                                                       KTrieNode *check_base = &m_base[m_base[newidx].idx] + 1;
00379                                                                       outgoing_limit = (m_base + m_baseSize + 1) - check_base;
00380                                                                       if (outgoing_limit > 255)
00381                                                                       {
00382                                                                                 outgoing_limit = 255;
00383                                                                       }
00384                                                                       for (unsigned int j=1; j<=outgoing_limit; j++, check_base++)
00385                                                                       {
00386                                                                                 if (check_base->parent == oldidx)
00387                                                                                 {
00388                                                                                           check_base->parent = newidx;
00389                                                                                 }
00390                                                                       }
00391                                                             }
00392                                                   }
00393 
00394                                                   /* Take the invisible node and use it as our new node */
00395                                                   node = &m_base[q + outgoing_list[outgoing_count]];
00396                                         }
00397 
00398                                         /* We're finally done! */
00399                                         node->parent = lastidx;
00400                                         if (*keyptr == '\0')
00401                                         {
00402                                                   node->mode = Node_Arc;
00403                                         }
00404                                         else
00405                                         {
00406                                                   node->idx = x_addstring(keyptr);
00407                                                   node->mode = Node_Term;
00408                                         }
00409                                         node->valset = true;
00410                                         new (&node->value) K(obj);
00411 
00412                                         m_numElements++;
00413 
00414                                         return true;
00415                               }
00416                               else
00417                               {
00418                                         /* See what's in the next node - special case if terminator! */
00419                                         if (node->mode == Node_Term)
00420                                         {
00421                                                   /* If we're a terminator, we need to handle CASE 3:
00422                                                    * Insertion when a terminating collision occurs
00423                                                    */
00424                                                   char *term = &m_stringtab[node->idx];
00425                                                   /* Do an initial browsing to make sure they're not the same string */
00426                                                   if (strcmp(keyptr, term) == 0)
00427                                                   {
00428                                                             if (!node->valset)
00429                                                             {
00430                                                                       node->valset = true;
00431                                                                       new (&node->value) K(obj);
00432                                                                       m_numElements++;
00433                                                                       return true;
00434                                                             }
00435                                                             /* Same string.  We can't insert. */
00436                                                             return false;
00437                                                   }
00438                                                   /* For each matching character pair, we need to disband the terminator.
00439                                                    * This splits the similar prefix into a single arc path.
00440                                                    * First, save the old values so we can move them to a new node.
00441                                                    * Next, for each loop:
00442                                                    *  Take the current (invalid) node, and point it to the next arc base.
00443                                                    *  Set the current node to the node at the next arc.
00444                                                    */
00445                                                   K oldvalue;
00446                                                   bool oldvalset = node->valset;
00447                                                   if (oldvalset)
00448                                                   {
00449                                                             oldvalue = node->value;
00450                                                   }
00451                                                   if (*term == *keyptr)
00452                                                   {
00453                                                             while (*term == *keyptr)
00454                                                             {
00455                                                                       /* Find the next free slot in the check array.
00456                                                                        * This is the "vector base" essentially
00457                                                                        */
00458                                                                       q = x_check(*term);
00459                                                                       node = &m_base[curidx];
00460                                                                       /* Point the node to the next new base */
00461                                                                       node->idx = q;
00462                                                                       node->mode = Node_Arc;
00463                                                                       if (node->valset == true)
00464                                                                       {
00465                                                                                 node->value.~K();
00466                                                                                 node->valset = false;
00467                                                                       }
00468                                                                       /* Advance the input stream and local variables */
00469                                                                       lastidx = curidx;
00470                                                                       curidx = q + charval(*term);
00471                                                                       node = &m_base[curidx];
00472                                                                       /* Make sure the new current node has its parent set. */
00473                                                                       node->parent = lastidx;
00474                                                                       node->mode = Node_Arc;        /* Just in case we run x_check again */
00475                                                                       *term = '\0';       /* Unmark the string table here */
00476                                                                       term++;
00477                                                                       keyptr++;
00478                                                             }
00479                                                   }
00480                                                   else if (node->valset)
00481                                                   {
00482                                                             node->valset = false;
00483                                                             node->value.~K();
00484                                                   }
00485                                                   /* We're done inserting new pairs.  If one of them is exhausted,
00486                                                    * we take special shortcuts.
00487                                                    */
00488                                                   if (*term == '\0')                                //EX: BADGERHOUSE added over B -> ADGER.  
00489                                                   {
00490                                                             /* First backpatch the current node - it ends the newly split terminator.
00491                                                              * In the example, this would mean the node is the production from R -> ?
00492                                                              * This node ends the old BADGER, so we set it here.
00493                                                              */
00494                                                             node->valset = oldvalset;
00495                                                             if (node->valset)
00496                                                             {
00497                                                                       new (&node->value) K(oldvalue);
00498                                                             }
00499 
00500                                                             /* The terminator was split up, but pieces of keyptr remain.
00501                                                              * We need to generate a new production, in this example, R -> H,
00502                                                              * with H being a terminator to OUSE.  Thus we get:
00503                                                              * B,A,D,G,E,R*,H*->OUSE (* = value set).
00504                                                              * NOTE: parent was last set at the end of the while loop.
00505                                                              */
00506                                                             /* Get the new base and apply re-basing */
00507                                                             q = x_check(*keyptr);
00508                                                             node = &m_base[curidx];
00509 
00510                                                             node->idx = q;
00511                                                             node->mode = Node_Arc;
00512                                                             lastidx = curidx;
00513                                                             /* Finish the final node */
00514                                                             curidx = q + charval(*keyptr);
00515                                                             node = &m_base[curidx];
00516                                                             keyptr++;
00517                                                             /* Optimize - don't add to string table if there's nothing more to eat */
00518                                                             if (*keyptr == '\0')
00519                                                             {
00520                                                                       node->mode = Node_Arc;
00521                                                             }
00522                                                             else
00523                                                             {
00524                                                                       node->idx = x_addstring(keyptr);
00525                                                                       node->mode = Node_Term;
00526                                                             }
00527                                                             node->parent = lastidx;
00528                                                             node->valset = true;
00529                                                             new (&node->value) K(obj);
00530                                                   }
00531                                                   else if (*keyptr == '\0')
00532                                                   {         //EX: BADGER added over B -> ADGERHOUSE
00533                                                             /* First backpatch the current node - it ends newly split input string.
00534                                                              * This is the exact opposite of the above procedure.
00535                                                              */
00536                                                             node->valset = true;
00537                                                             new (&node->value) K(obj);
00538 
00539                                                             /* Get the new base and apply re-basing */
00540                                                             q = x_check(*term);
00541                                                             node = &m_base[curidx];
00542 
00543                                                             node->idx = q;
00544                                                             node->mode = Node_Arc;
00545                                                             lastidx = curidx;
00546                                                             /* Finish the final node */
00547                                                             curidx = q + charval(*term);
00548                                                             node = &m_base[curidx];
00549                                                             term++;
00550                                                             /* Optimize - don't add to string table if there's nothing more to eat */
00551                                                             if (*term == '\0')
00552                                                             {
00553                                                                       node->mode = Node_Arc;
00554                                                             }
00555                                                             else
00556                                                             {
00557                                                                       node->idx = (term - m_stringtab); /* Already in the string table! */
00558                                                                       node->mode = Node_Term;
00559                                                             }
00560                                                             node->parent = lastidx;
00561                                                             node->valset = oldvalset;
00562                                                             if (node->valset)
00563                                                             {
00564                                                                       new (&node->value) K(oldvalue);
00565                                                             }
00566                                                   }
00567                                                   else
00568                                                   {
00569                                                             /* Finally, we have to create two new nodes instead of just one. */
00570                                                             node->mode = Node_Arc;
00571 
00572                                                             /* Get the new base and apply re-basing */
00573                                                             q = x_check2(*keyptr, *term);
00574                                                             node = &m_base[curidx];
00575 
00576                                                             node->idx = q;
00577                                                             lastidx = curidx;
00578 
00579                                                             /* Re-create the old terminated node */
00580                                                             curidx = q + charval(*term);
00581                                                             node = &m_base[curidx];
00582                                                             term++;
00583                                                             node->valset = oldvalset;
00584                                                             if (node->valset)
00585                                                             {
00586                                                                       new (&node->value) K(oldvalue);
00587                                                             }
00588                                                             node->parent = lastidx;
00589                                                             if (*term == '\0')
00590                                                             {
00591                                                                       node->mode = Node_Arc;
00592                                                             }
00593                                                             else
00594                                                             {
00595                                                                       node->mode = Node_Term;
00596                                                                       node->idx = (term - m_stringtab); /* Already in the string table! */
00597                                                             }
00598 
00599                                                             /* Create the new keyed input node */
00600                                                             curidx = q + charval(*keyptr);
00601                                                             node = &m_base[curidx];
00602                                                             keyptr++;
00603                                                             node->valset = true;
00604                                                             new (&node->value) K(obj);
00605                                                             node->parent = lastidx;
00606                                                             if (*keyptr == '\0')
00607                                                             {
00608                                                                       node->mode = Node_Arc;
00609                                                             }
00610                                                             else
00611                                                             {
00612                                                                       node->mode = Node_Term;
00613                                                                       node->idx = x_addstring(keyptr);
00614                                                             }
00615                                                   }
00616 
00617                                                   m_numElements++;
00618 
00619                                                   /* Phew! */
00620                                                   return true;
00621                                         }
00622                                         else
00623                                         {
00624                                                   assert(node->mode == Node_Arc);
00625                                         }
00626                               }
00627                               lastidx = curidx;
00628                     } while (*keyptr != '\0');
00629 
00630                     assert(node);
00631 
00632                     /* If we've exhausted the string and we have a valid reached node,
00633                      * the production rule already existed.  Make sure it's valid to set first.
00634                      */
00635 
00636                     /* We have to be an Arc.  If the last result was anything else, we would have returned a new 
00637                      * production earlier.
00638                      */
00639                     assert(node->mode == Node_Arc);
00640 
00641                     if (!node->valset)
00642                     {
00643                               node->valset = true;
00644                               new (&node->value) K(obj);
00645                               m_numElements++;
00646                               return true;
00647                     }
00648 
00649                     return false;
00650           }
00651 
00652           /** 
00653            * @brief Iterates over the trie returning all known values.  
00654            * 
00655            * Note: This function is for debugging.  Do not use it as a 
00656            * production iterator since it's inefficient.  Iteration is 
00657            * guaranteed to be sorted ascendingly.
00658            *
00659            * The callback function takes:
00660            *  (KTrie)                             - Pointer to this Trie
00661            *  (const char *)  - String containing key name.
00662            *  (K &)                     - By-reference object at the key.
00663            *  (data)                              - User pointer.
00664            *
00665            * @param buffer                        Buffer to use as a key name cache.
00666            * @param maxlength                     Maximum length of the key name buffer.
00667            * @param data                                    User pointer for passing to the iterator.
00668            * @param func                                    Iterator callback function.
00669            */
00670           void bad_iterator(char *buffer, 
00671                     size_t maxlength, 
00672                     void *data,
00673                     void (*func)(KTrie *, const char *, K & obj, void *data))
00674           {
00675                     bad_iterator_r(buffer, maxlength, 0, data, func, 1);
00676           }
00677 
00678 private:
00679           void bad_iterator_r(char *buffer, 
00680                     size_t maxlength, 
00681                     size_t buf_pos,
00682                     void *data,
00683                     void (*func)(KTrie *, const char *, K & obj, void *data),
00684                     unsigned int root)
00685           {
00686                     char *term;
00687                     unsigned int idx, limit, start;
00688 
00689                     limit = 255;
00690                     start = m_base[root].idx;
00691 
00692                     /* Bound our limits */
00693                     if (start + limit > m_baseSize)
00694                     {
00695                               limit = m_baseSize - start;
00696                     }
00697 
00698                     /* Search for strings */
00699                     for (unsigned int i = 1; i <= limit; i++)
00700                     {
00701                               idx = start + i;
00702                               if (m_base[idx].mode == Node_Unused
00703                                         || m_base[idx].parent != root)
00704                               {
00705                                         continue;
00706                               }
00707 
00708                               if (m_base[idx].mode == Node_Arc)
00709                               {
00710                                         if (buf_pos < maxlength - 1)
00711                                         {
00712                                                   buffer[buf_pos++] = (char)i;
00713                                         }
00714 
00715                                         if (m_base[idx].valset)
00716                                         {
00717                                                   buffer[buf_pos] = '\0';
00718                                                   func(this, buffer, m_base[idx].value, data);
00719                                         }
00720 
00721                                         bad_iterator_r(buffer,
00722                                                   maxlength,
00723                                                   buf_pos,
00724                                                   data,
00725                                                   func,
00726                                                   idx);
00727 
00728                                         buf_pos--;
00729                               }
00730                               else if (m_base[idx].mode == Node_Term 
00731                                                    && m_base[idx].valset == true)
00732                               {
00733                                         size_t save_buf_pos;
00734 
00735                                         save_buf_pos = buf_pos;
00736                                         if (buf_pos < maxlength - 1)
00737                                         {
00738                                                   buffer[buf_pos++] = (char)i;
00739                                         }
00740                                         if (buf_pos < maxlength - 1)
00741                                         {
00742                                                   size_t destlen, j;
00743 
00744                                                   term = &m_stringtab[m_base[idx].idx];
00745                                                   destlen = strlen(term);
00746                                                   for (j = 0; 
00747                                                              j < destlen && j + buf_pos < maxlength - 1;
00748                                                              j++)
00749                                                   {
00750                                                             buffer[buf_pos + j] = term[j];
00751                                                   }
00752                                                   buf_pos += j;
00753                                         }
00754                                         buffer[buf_pos] = '\0';
00755 
00756                                         func(this, buffer, m_base[idx].value, data);
00757                                         
00758                                         buf_pos = save_buf_pos;
00759                               }
00760                     }
00761           }
00762 public:
00763           KTrie()
00764           {
00765                     m_base = (KTrieNode *)malloc(sizeof(KTrieNode) * (256 + 1));
00766                     m_stringtab = (char *)malloc(sizeof(char) * 256);
00767                     m_baseSize = 256;
00768                     m_stSize = 256;
00769                     m_empty = NULL;
00770                     m_numElements = 0;
00771 
00772                     internal_clear();
00773           }
00774           ~KTrie()
00775           {
00776                     if (m_empty != NULL && m_empty->valset)
00777                     {
00778                               m_empty->value.~K();
00779                               m_empty->valset = false;
00780                     }
00781                     free(m_empty);
00782                     run_destructors();
00783                     free(m_base);
00784                     free(m_stringtab);
00785           }
00786           void run_destructor(void (*dtor)(K * ptr))
00787           {
00788                     for (size_t i = 0; i <= m_baseSize; i++)
00789                     {
00790                               if (m_base[i].valset)
00791                               {
00792                                         dtor(&m_base[i].value);
00793                                         m_base[i].valset = false;
00794                               }
00795                     }
00796           }
00797 private:
00798           class KTrieNode
00799           {
00800                     friend class KTrie;
00801           private:
00802                     /**
00803                      * For Node_Arc, this index stores the 'base' offset to the next arc chain.
00804                      *   I.e. to jump from this arc to character C, it will be at base[idx+C].
00805                      * For Node_Term, this is an index into the string table.
00806                      */
00807                     unsigned int idx;   
00808 
00809                     /**
00810                      * This contains the prior arc that we must have come from.
00811                      * For example, if arc 63 has a base jump of index 12, and we want to see if
00812                      * there is a valid character C, the parent of 12+C must be 63.
00813                      */
00814                     unsigned int parent;
00815                     K value;                                /* Value associated with this node */
00816                     NodeType mode;                          /* Current usage type of the node */
00817                     bool valset;                            /* Whether or not a value is set */
00818           };
00819 private:
00820           KTrieNode *internal_retrieve(const char *key)
00821           {
00822                     unsigned int lastidx = 1;               /* the last node index */
00823                     unsigned int curidx;                              /* current node index */
00824                     const char *keyptr = key;               /* input stream at current token */
00825                     KTrieNode *node = NULL;                           /* current node being processed */
00826 
00827                     if (!*key)
00828                     {
00829                               return m_empty;
00830                     }
00831 
00832                     /* Start traversing at the root node */
00833                     do
00834                     {
00835                               /* Find where the next character is, then advance */
00836                               curidx = m_base[lastidx].idx;
00837                               node = &m_base[curidx];
00838                               curidx += charval(*keyptr);
00839                               node = &m_base[curidx];
00840                               keyptr++;
00841 
00842                               /* Check if this slot is supposed to be empty or is a collision */
00843                               if ((curidx > m_baseSize) || node->mode == Node_Unused || node->parent != lastidx)
00844                               {
00845                                         return NULL;
00846                               }
00847                               else if (node->mode == Node_Term)
00848                               {
00849                                         char *term = &m_stringtab[node->idx];
00850                                         if (strcmp(keyptr, term) == 0)
00851                                         {
00852                                                   break;
00853                                         }
00854                                         else
00855                                         {
00856                                                   return NULL;
00857                                         }
00858                               }
00859                               lastidx = curidx;
00860                     } while (*keyptr != '\0');
00861 
00862                     return node;
00863           }
00864           bool grow()
00865           {
00866                     /* The current # of nodes in the tree is trie->baseSize + 1 */
00867                     unsigned int cur_size = m_baseSize;
00868                     unsigned int new_size = cur_size * 2;
00869 
00870                     KTrieNode *new_base = (KTrieNode *)malloc((new_size + 1) * sizeof(KTrieNode));
00871                     if (!new_base)
00872                     {
00873                               return false;
00874                     }
00875 
00876                     memcpy(new_base, m_base, sizeof(KTrieNode) * (m_baseSize + 1));
00877                     memset(&new_base[cur_size + 1], 0, (new_size - cur_size) * sizeof(KTrieNode));
00878 
00879                     for (size_t i = 0; i <= m_baseSize; i++)
00880                     {
00881                               if (m_base[i].valset)
00882                               {
00883                                         /* Placement construct+copy the object, then placement destroy the old. */
00884                                         new (&new_base[i].value) K(m_base[i].value);
00885                                         m_base[i].value.~K();
00886                               }
00887                     }
00888 
00889                     free(m_base);
00890                     m_base = new_base;
00891                     m_baseSize = new_size;
00892 
00893                     return true;
00894           }
00895           inline unsigned char charval(char c)
00896           {
00897                     return (unsigned char)c;
00898           }
00899           void internal_clear()
00900           {
00901                     m_tail = 0;
00902                     m_numElements = 0;
00903 
00904                     memset(m_base, 0, sizeof(KTrieNode) * (m_baseSize + 1));
00905                     memset(m_stringtab, 0, sizeof(char) * m_stSize);
00906 
00907                     /* Sentinel root node */
00908                     m_base[1].idx = 1;
00909                     m_base[1].mode = Node_Arc;
00910                     m_base[1].parent = 1;
00911           }
00912           void run_destructors()
00913           {
00914                     for (size_t i = 0; i <= m_baseSize; i++)
00915                     {
00916                               if (m_base[i].valset)
00917                               {
00918                                         m_base[i].value.~K();
00919                               }
00920                     }
00921           }
00922           unsigned int x_addstring(const char *ptr)
00923           {
00924                     size_t len = strlen(ptr) + 1;
00925 
00926                     if (m_tail + len >= m_stSize)
00927                     {                   
00928                               while (m_tail + len >= m_stSize)
00929                               {
00930                                         m_stSize *= 2;
00931                               }
00932                               m_stringtab = (char *)realloc(m_stringtab,m_stSize);
00933                     }
00934 
00935                     unsigned int tail = m_tail;
00936                     strcpy(&m_stringtab[tail], ptr);
00937                     m_tail += len;
00938 
00939                     return tail;
00940           }
00941           unsigned int x_check(char c, unsigned int start=1)
00942           {
00943                     unsigned char _c = charval(c);
00944                     unsigned int to_check = m_baseSize - _c;
00945                     for (unsigned int i=start; i<=to_check; i++)
00946                     {
00947                               if (m_base[i+_c].mode == Node_Unused)
00948                               {
00949                                         return i;
00950                               }
00951                     }
00952 
00953                     grow();
00954 
00955                     return x_check(c, to_check+1);
00956           }
00957           unsigned int x_check2(char c1, char c2, unsigned int start=1)
00958           {
00959                     unsigned char _c1 = charval(c1);
00960                     unsigned char _c2 = charval(c2);
00961                     unsigned int to_check = m_baseSize - (_c1 > _c2 ? _c1 : _c2);
00962                     for (unsigned int i=start; i<=to_check; i++)
00963                     {
00964                               if (m_base[i+_c1].mode == Node_Unused
00965                                         && m_base[i+_c2].mode == Node_Unused)
00966                               {
00967                                         return i;
00968                               }
00969                     }
00970 
00971                     grow();
00972 
00973                     return x_check2(c1, c2, to_check+1);
00974           }
00975           unsigned int x_check_multi(
00976                     unsigned int offsets[], 
00977                     unsigned int count,
00978                     unsigned int start=1)
00979           {
00980                     KTrieNode *cur;
00981                     unsigned int to_check = m_baseSize;
00982                     unsigned int highest = 0;
00983 
00984                     for (unsigned int i=0; i<count; i++)
00985                     {
00986                               if (offsets[i] > highest)
00987                               {
00988                                         highest = offsets[i];
00989                               }
00990                     }
00991 
00992                     to_check -= highest;
00993 
00994                     for (unsigned int i=start; i<=to_check; i++)
00995                     {
00996                               bool okay = true;
00997                               for (unsigned int j=0; j<count; j++)
00998                               {
00999                                         cur = &m_base[i+offsets[j]];
01000                                         if (cur->mode != Node_Unused)
01001                                         {
01002                                                   okay = false;
01003                                                   break;
01004                                         }
01005                               }
01006                               if (okay)
01007                               {
01008                                         return i;
01009                               }
01010                     }
01011 
01012                     grow();
01013 
01014                     return x_check_multi(offsets, count, to_check+1);
01015           }
01016 public:
01017           size_t mem_usage()
01018           {
01019                     return (sizeof(KTrieNode) * (m_baseSize))
01020                               + m_stSize
01021                               + sizeof(KTrieNode);
01022           }
01023           size_t size()
01024           {
01025                     return m_numElements;
01026           }
01027 private:
01028           KTrieNode *m_base;                      /* Base array for the sparse tables */
01029           KTrieNode *m_empty;                     /* Special case for empty strings */
01030           char *m_stringtab;                      /* String table pointer */
01031           unsigned int m_baseSize;      /* Size of the base array, in members */
01032           unsigned int m_stSize;                  /* Size of the string table, in bytes */
01033           unsigned int m_tail;                    /* Current unused offset into the string table */
01034           size_t m_numElements;                   /* Number of elements in use */
01035 };
01036 
01037 /**
01038  * Double Array Trie algorithm, based on:
01039  * An Efficient Implementation of Trie Structures, by
01040  *  Jun-ichi Aoe and Katsushi Maromoto, and Takashi Sato
01041  * from Software - Practice and Experience, Vol. 22(9), 695-721 (September 1992)
01042  *
01043  *  A Trie is a simple data structure which stores strings as DFAs, with each 
01044  * transition state being a string entry.  For example, observe the following strings:
01045  *
01046  * BAILOPAN, BAT, BACON, BACK
01047  *  These transition as the follow production rules:
01048  *  B -> ...                  B
01049  *       A -> ...             BA
01050  *            I -> ...        BAI
01051  *                 LOPAN      BAILOPAN
01052  *            T -> ...        BAT
01053  *            C ->            BAC
01054  *                 O -> ...   BACO
01055  *                      N     BACON
01056  *                 K          BACK
01057  *
01058  *  The standard implementation for this - using lists - gives a slow linear lookup, somewhere between
01059  * O(N+M) or O(log n).  A faster implementation is proposed in the paper above, which is based on compacting
01060  * the transition states into two arrays.  In the paper's implementation, two arrays are used, and thus it is
01061  * called the "Double Array" algorithm.  However, the CHECK array's size is maintained the same as BASE, 
01062  * so they can be combined into one structure.  The array seems complex at first, but is very simple: it is a
01063  * tree structure flattened out into a single vector.  I am calling this implementation the Flat Array Trie.
01064  *
01065  *  BASE[] is an array where each member is a node in the Trie.  The node can either be UNUSED (empty), an ARC
01066  * (containing an offset to the next set of ARCs), or a TERMINATOR (contains the rest of a string).
01067  * Each node has an index which must be interpreted based on the node type.  If the node is a TERMINATOR, then the
01068  * index is an index into a string table, to find the rest of the string.  
01069  *  If the node is an ARC, the index is another index into BASE.  For each possible token that can follow the
01070  * current token, the value of those tokens can be added to the index given in the ARC.  Thus, given a current
01071  * position and the next desired token, the current arc will jump to another arc which can contain either:
01072  *   1) An invalid production (collision, no entry exists)
01073  *   2) An empty production (no entry exists)
01074  *   3) Another arc label (the string ends here or continues into more productions)
01075  *   4) A TERMINATOR (the string ends here and contains an unused set of productions)
01076  * 
01077  *  So, given current offset N (starting at N=1), jumping to token C means the next offset will be:
01078  *      offs = BASE[n] + C
01079  *  Thus, the next node will be at:
01080  *      BASE[BASE[n] + C]
01081  * 
01082  *  This allows each ARC to specify the base offset for any of its ARC children, like a tree.  Each node specifies
01083  * its parent ARC -- so if an invalid offset is specified, the parent will not match, and thus no such derived 
01084  * string exists.
01085  *
01086  *  This means that arrays can be laid out "sparsely," maximizing their usage.  Note that N need not be related to
01087  * the range of tokens (1-256).  I.e., a base index does not have to be at 1, 256, 512, et cetera.  This is because
01088  * insertion comes with a small deal of complexity.  To insert a new set of tokens T, the algorithm finds a new 
01089  * BASE index N such that BASE[N+T[i]] is unused for each T[i].  Thus, indirection is not necessarily linear; 
01090  * traversing a chain of ARC nodes can _and will_ jump around BASE.
01091  *
01092  *  Of course, given this level of flexibility in the array organization, there are collisions.  This is largely 
01093  * where insertions become slow, as the old chain must be relocated before the new one is used.  Relocation means 
01094  * finding one or more new base indexes, and this means traversing BASE until an acceptable index is found, such 
01095  * that each offset is unused (see description in previous paragraph).
01096  *
01097  *  However, it is not insertion time we are concerned about.  The "trie" name comes from reTRIEval.  We are only
01098  * concerned with lookup and deletion.  Both lookup and deletion are O(k), where k is relative to the length of the 
01099  * input string.  Note that it is best case O(1) and worst case O(k).  Deleting the entire trie is always O(1).
01100  */
01101 
01102 #endif //_INCLUDE_SOURCEMOD_TEMPLATED_TRIE_H_

Generated on Wed Dec 7 2011 18:50:03 for SourceMod SDK by  doxygen 1.7.1