php5.6 的哈希表比较恶心, php7 也对哈希表进行了改造, 先介绍下 php5.6 的哈希表
原来大家都清楚, 我们看一下更细的一部分, 如何更新插入:
- static zend_never_inline zval **_get_zval_cv_lookup_BP_VAR_W(zval ***ptr, zend_uint var TSRMLS_DC)
- {
- zend_compiled_variable *cv = &CV_DEF_OF(var);
- if (!EG(active_symbol_table)) {
- Z_ADDREF(EG(uninitialized_zval));
- *ptr = (zval**)EX_CV_NUM(EG(current_execute_data), EG(active_op_array)->last_var + var);
- **ptr = &EG(uninitialized_zval);
- } else if (zend_hash_quick_find(EG(active_symbol_table), cv->name, cv->name_len+1, cv->hash_value, (void **)ptr)==FAILURE) {
- Z_ADDREF(EG(uninitialized_zval));
- zend_hash_quick_update(EG(active_symbol_table), cv->name, cv->name_len+1, cv->hash_value, &EG(uninitialized_zval_ptr), sizeof(zval *), (void **)ptr);
- }
- return *ptr;
- }
注意一下
ZEND_API int _zend_hash_quick_add_or_update(HashTable *ht, const char *arKey, uint nKeyLength, ulong h, void *pData, uint nDataSize, void **pDest, int flag ZEND_FILE_LINE_DC);
zend_hash_quick_update(EG(active_symbol_table), cv->name, cv->name_len+1, cv->hash_value, &EG(uninitialized_zval_ptr), sizeof(zval *), (void **)ptr);
首先要注意看的是 & EG(uninitialized_zval_ptr)是一个 **zval 结构指针, 把它存进去的目的是初始化, 也就是让 hash 表结构指针有所指向, 一个固定位置, 后续会根据 pDest 指针进行赋值.
其次是 pDest 是二级指针, 为什么会是二级指针, 因为 c 语言函数传递都是值传递, 要改变指针值只能将指针地址传入.
但是虽然是二级指针, 事实调用的却是用的三级真值 ***ptr, 那是因为符号表变量存的永远是 **zval 结构, 加上 ptr 变量地址, 所以三级就是这样来的.
- #define UPDATE_DATA(ht, p, pData, nDataSize) \
- if (nDataSize == sizeof(void*)) { \
- if ((p)->pData != &(p)->pDataPtr) { \
- pefree_rel((p)->pData, (ht)->persistent); \
- } \
- memcpy(&(p)->pDataPtr, pData, sizeof(void *)); \
- (p)->pData = &(p)->pDataPtr; \
- } else { \
- if ((p)->pData == &(p)->pDataPtr) { \
- (p)->pData = (void *) pemalloc_rel(nDataSize, (ht)->persistent); \
- (p)->pDataPtr=NULL; \
- } else { \
- (p)->pData = (void *) perealloc_rel((p)->pData, nDataSize, (ht)->persistent); \
- /* (p)->pDataPtr is already NULL so no need to initialize it */ \
- } \
- memcpy((p)->pData, pData, nDataSize); \
- }
开始把 memcpy 函数搞错了, 是指针指向的内容进行拷贝, 所以 p->pDataPtr 存放的是一级指针 * zval
具体看下面画的很难看的图
接下来是 php7 的数据结构:
- struct _zend_string {
- zend_refcounted_h gc;
- zend_ulong h; /* hash value */
- size_t len;
- char val[1];
- };
- typedef struct _Bucket {
- zval val;
- zend_ulong h; /* hash value (or numeric index) */
- zend_string *key; /* string key or NULL for numerics */
- } Bucket;
- typedef struct _zend_array HashTable;
- struct _zend_array {
- zend_refcounted_h gc;
- union {
- struct {
- ZEND_ENDIAN_LOHI_4(
- zend_uchar flags,
zend_uchar nApplyCount,
zend_uchar nIteratorsCount,
- zend_uchar reserve)
- } v;
- uint32_t flags;
- } u;
- uint32_t nTableMask;
- Bucket *arData;
- uint32_t nNumUsed;
- uint32_t nNumOfElements;
- uint32_t nTableSize;
- uint32_t nInternalPointer;
- zend_long nNextFreeElement;
- dtor_func_t pDestructor;
- };
- /*
- * HashTable Data Layout
- * =====================
- *
- * +=============================+
- * | HT_HASH(ht, ht->nTableMask) |
- * | ... |
- * | HT_HASH(ht, -1) |
- * +-----------------------------+
- * ht->arData ---> | Bucket[0] |
- * | ... |
- * | Bucket[ht->nTableSize-1] |
- * +=============================+
- */
ht->arData 仅是一级指针, 分为上下两部分, 上部分是索引根据 hash 值偏移获取下半部分的 index, 从而拿到内容
- <pre style="font-family: NSimSun; font-size: 16px; background: white;"><span style="color:blue;">#define</span> HT_HASH_EX(data, idx) \
- ((uint32_t*)(data))[(int32_t)(idx)]
根据 hash 值获取 int 类型的偏移量, 通常 idx 为负数见初始化:
- static void zend_always_inline zend_hash_real_init_ex(HashTable *ht, int packed)
- {
- HT_ASSERT(GC_REFCOUNT(ht) == 1);
- ZEND_ASSERT(!((ht)->u.flags & HASH_FLAG_INITIALIZED));
- if (packed) {
- HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT));
- (ht)->u.flags |= HASH_FLAG_INITIALIZED | HASH_FLAG_PACKED;
- HT_HASH_RESET_PACKED(ht);
- } else {
- (ht)->nTableMask = -(ht)->nTableSize;
- HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT));
- (ht)->u.flags |= HASH_FLAG_INITIALIZED;
- if (EXPECTED(ht->nTableMask == -8)) {
- Bucket *arData = ht->arData;
- HT_HASH_EX(arData, -8) = -1;
- HT_HASH_EX(arData, -7) = -1;
- HT_HASH_EX(arData, -6) = -1;
- HT_HASH_EX(arData, -5) = -1;
- HT_HASH_EX(arData, -4) = -1;
- HT_HASH_EX(arData, -3) = -1;
- HT_HASH_EX(arData, -2) = -1;
- HT_HASH_EX(arData, -1) = -1;
- } else {
- HT_HASH_RESET(ht);
- }
- }
- }
- # define HT_HASH_TO_BUCKET_EX(data, idx) \
- ((Bucket*)((char*)(data) + (idx)))
根据上面获取的偏移量获取 bucket 指针
可见增加元素的方法
- add_to_hash:
- HANDLE_BLOCK_INTERRUPTIONS();
- idx = ht->nNumUsed++;
- ht->nNumOfElements++;
- if (ht->nInternalPointer == HT_INVALID_IDX) {
- ht->nInternalPointer = idx;
- }
- zend_hash_iterators_update(ht, HT_INVALID_IDX, idx);
- p = ht->arData + idx;
- p->key = key;
- if (!ZSTR_IS_INTERNED(key)) {
- zend_string_addref(key);
- ht->u.flags &= ~HASH_FLAG_STATIC_KEYS;
- zend_string_hash_val(key);
- }
- p->h = h = ZSTR_H(key);
- ZVAL_COPY_VALUE(&p->val, pData);
- nIndex = h | ht->nTableMask;
- Z_NEXT(p->val) = HT_HASH(ht, nIndex);
- HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx);
- HANDLE_UNBLOCK_INTERRUPTIONS();
首先 zval 元素的位置是顺序存储的, 就是 ht->arData 的下半部分, 然后通过 hash 算出 nIndex 位移即上部分, 首先将上部分的初始位置 (-1) 赋值给下半部分 zval 的 next 属性, 然后赋值为下部分的位置(1), 如此循环, 当加入一个新的 hash 相同的数据时, 此时将 1 赋值给新的 zval 的 next, 上部分的值为 2.
2 $var = 1
php7 解析过程 :
compile_file -> zendparse(yyparse) -> zend_compile_top_stmt -> zend_compile_stmt
-> zend_compile_assign -> zend_compile_simple_var -> zend_try_compile_cv -> zend_compile_simple_var_no_cv 获取 opcode
来源: https://blog.csdn.net/xiaolei1982/article/details/52472278