redis源码学习-sds篇

sds介绍

redis的string类型采用自己实现的sds(Simple Dynamic String,也叫动态字符串)进行存储,文件放在/src/sds.c和/src/sds.h下。之所以采用sds而非c的string的原因有很多,归根结底都是为了保证使用的安全方便以及提高效率。

sds分析

sds在redis中的定义为:typedef char *sds;
sds自身并没有什么特殊的,只为一个指针,sds最终所指的是sdshdr中的buf,此后的着重分析都是sdshdr。
从源码来看,sdshdr数据结构为

/* Note: sdshdr5 is never used, we just access the flags byte directly.
 * However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
    unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
    uint8_t len; /* used */
    uint8_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
    uint16_t len; /* used */
    uint16_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
    uint32_t len; /* used */
    uint32_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
    uint64_t len; /* used */
    uint64_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};

从注释可以看到,sdshdr5并没有被真正使用到,此后只分析sdshdr8-sdshdr64的结构。
从定义上看,每个sdshdr结构体里四个字段。len代表着已经使用的字符串长度。alloc代表着在不包括SDS头部和结尾的NULL字符的情况下,sds能够存储的字符串的最大容量。flags代表着sds的类型,其中flags本身为八位,低三位才是代表着sds的类型,高五位未用,取值范围为0-4,但是因为sdshdr5未被使用,所以真实范围为1-4。buf为字符数组,真正用来存储字符串。
熟悉c的小伙伴们应该注意到了,sds定义时采用了__attribute__ ((packed))关键字,代表着sdshdr结构体是取消了字节对齐的,这样做三个好处,其一是节省内存,在相同业务下可以使得redis能存更多的业务数据。其二是动态更改结构体大小时非常方便。其三是加快效率,由于取消了字节对齐,那么sds[-1]指向的就是flags,在redis里能够大量的看到类似于char type,oldtype = s[-1]& SDS_TYPE_MASK;的操作,就是这样取得flags的。
作为测试也可以看到,

#include <stdio.h>
struct test1 {
    int i;
    char c;
};
struct __attribute__ ((__packed__)) test2 {
    int i;
    char c;
};
int main(int argc, const char * argv[]) {
    int test1size = sizeof(struct test1);
    int test2size = sizeof(struct test2);
    printf("size of test1:%d\n", test1size);
    printf("size of test2:%d\n", test2size);
    //printf(sizeof(struct test2));
    return 0;
}
结果为
size of test1:8
size of test2:5
Program ended with exit code: 0

sdshdr结构体分析完成后我们可以着重看下sds与c的string的区别与优劣了。
首先,sdshdr定义的len肯定就是用来减小时间复杂度的,如果说要获取字符串的长度,那么原生的时间复杂度为O(n),而采取了非内存对齐的sdshdr获取到len长度只需要定义好#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)))),根据上述讲到的获得flags的方法,SDS_HDR(type,s)->len即可知道len的大小,时间复杂度为O(1)
对于buf,buf数组尾部隐含有一个’\0’,但是sds判断是否到了结尾并不是以\0为标准的,而是以len字段。这样的话就算set了\0进来,redis也是会正确的将\0存下,实现了string二进制安全的效果
sds采取的是空间预分配策略,对于string的追加策略,原生的每追加一次都会产生一次realloc,而sds空间预分配时,只在buffer不够的情况下产生realloc。从现实上讲,redis在以空间换时间和以时间换空间上尽量的做到均衡,在牺牲了一定的空间下极大的提升了执行效率。提到了空间分配,内存回收也是避免不了的。sds为惰性的释放策略,当清空一个sds时,只是将sdshdr的len设置为0,当之后再次使用到该object时,可以少做一次内存分配策略,在执行效率上相比较原生string高了不少。
以上讲的都是sds的优势,总优势是建立在个性化的优化上,但是在真正使用时会发现,sds在使用上也是会有一定的限制。比如源码中的

sds op = sdsnewlen(additive ? "+@" : "-@", 2);
op = sdscat(op,ACLCommandCategories[best].name);

可以看到sdscat中的op既是参数,也是返回值。上述讲到的sds空间与分配策略中,buffer不够时会产生realloc,realloc时是重新malloc空间,然后把当前的sds全部拷贝过去。在调用sdscat时无法知道会不会出现realloc,因此如果我们不将分配之后的地址重新赋值给op,那么op的地址将会无效。假如后续redis真正的支持了多线程,那么现有的sds模型将会更加头疼sds的地址失效问题。
sds的文字分析到这里结束了,接下来为源码中的具体实现,从细节上看看redis到底是如何初始化以及销毁sds对象的
sdshdr类型有四种,使用时采用宏定义

#define SDS_TYPE_5  0
#define SDS_TYPE_8  1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3

由于取消了内存对齐,那么在宏上提前定义好获取header的方法

#得到指向sdshdr的起始地址的指针
#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
#得到sdshdr的起始地址
#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)

O(1)获取字符串len的具体实现:

static inline size_t sdslen(const sds s) {
    unsigned char flags = s[-1];//内存非对齐后,buf[]的前一块即为flags
    switch(flags&SDS_TYPE_MASK) { //和flags的低3位相与,能够得到具体类型
        case SDS_TYPE_5:
            return SDS_TYPE_5_LEN(flags);
        case SDS_TYPE_8:
            return SDS_HDR(8,s)->len;//移动到sdshdr的起始,直接拿到的就是len的值
        case SDS_TYPE_16:
            return SDS_HDR(16,s)->len;
        case SDS_TYPE_32:
            return SDS_HDR(32,s)->len;
        case SDS_TYPE_64:
            return SDS_HDR(64,s)->len;
    }
    return 0;
}

获取剩余可用大小,setnewlen,inclen,获取alloc,setalloc与上述的sdslen实现基本一致,在此就不加注释了:

static inline size_t sdsavail(const sds s) {
    unsigned char flags = s[-1];
    switch(flags&SDS_TYPE_MASK) {
        case SDS_TYPE_5: {
            return 0;
        }
        case SDS_TYPE_8: {
            SDS_HDR_VAR(8,s);
            return sh->alloc - sh->len;
        }
        case SDS_TYPE_16: {
            SDS_HDR_VAR(16,s);
            return sh->alloc - sh->len;
        }
        case SDS_TYPE_32: {
            SDS_HDR_VAR(32,s);
            return sh->alloc - sh->len;
        }
        case SDS_TYPE_64: {
            SDS_HDR_VAR(64,s);
            return sh->alloc - sh->len;
        }
    }
    return 0;
}

static inline void sdssetlen(sds s, size_t newlen) {
    unsigned char flags = s[-1];
    switch(flags&SDS_TYPE_MASK) {
        case SDS_TYPE_5:
            {
                unsigned char *fp = ((unsigned char*)s)-1;
                *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
            }
            break;
        case SDS_TYPE_8:
            SDS_HDR(8,s)->len = newlen;
            break;
        case SDS_TYPE_16:
            SDS_HDR(16,s)->len = newlen;
            break;
        case SDS_TYPE_32:
            SDS_HDR(32,s)->len = newlen;
            break;
        case SDS_TYPE_64:
            SDS_HDR(64,s)->len = newlen;
            break;
    }
}

static inline void sdsinclen(sds s, size_t inc) {
    unsigned char flags = s[-1];
    switch(flags&SDS_TYPE_MASK) {
        case SDS_TYPE_5:
            {
                unsigned char *fp = ((unsigned char*)s)-1;
                unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
                *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
            }
            break;
        case SDS_TYPE_8:
            SDS_HDR(8,s)->len += inc;
            break;
        case SDS_TYPE_16:
            SDS_HDR(16,s)->len += inc;
            break;
        case SDS_TYPE_32:
            SDS_HDR(32,s)->len += inc;
            break;
        case SDS_TYPE_64:
            SDS_HDR(64,s)->len += inc;
            break;
    }
}

/* sdsalloc() = sdsavail() + sdslen() */
static inline size_t sdsalloc(const sds s) {
    unsigned char flags = s[-1];
    switch(flags&SDS_TYPE_MASK) {
        case SDS_TYPE_5:
            return SDS_TYPE_5_LEN(flags);
        case SDS_TYPE_8:
            return SDS_HDR(8,s)->alloc;
        case SDS_TYPE_16:
            return SDS_HDR(16,s)->alloc;
        case SDS_TYPE_32:
            return SDS_HDR(32,s)->alloc;
        case SDS_TYPE_64:
            return SDS_HDR(64,s)->alloc;
    }
    return 0;
}

static inline void sdssetalloc(sds s, size_t newlen) {
    unsigned char flags = s[-1];
    switch(flags&SDS_TYPE_MASK) {
        case SDS_TYPE_5:
            /* Nothing to do, this type has no total allocation info. */
            break;
        case SDS_TYPE_8:
            SDS_HDR(8,s)->alloc = newlen;
            break;
        case SDS_TYPE_16:
            SDS_HDR(16,s)->alloc = newlen;
            break;
        case SDS_TYPE_32:
            SDS_HDR(32,s)->alloc = newlen;
            break;
        case SDS_TYPE_64:
            SDS_HDR(64,s)->alloc = newlen;
            break;
    }
}

sds的初始化操作:

/* Create a new sds string with the content specified by the 'init' pointer
 * and 'initlen'.
 * If NULL is used for 'init' the string is initialized with zero bytes.
 * If SDS_NOINIT is used, the buffer is left uninitialized;
 *
 * The string is always null-terminated (all the sds strings are, always) so
 * even if you create an sds string with:
 *
 * mystring = sdsnewlen("abc",3);
 *
 * You can print the string with printf() as there is an implicit \0 at the
 * end of the string. However the string is binary safe and can contain
 * \0 characters in the middle, as the length is stored in the sds header. */
sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) {
    void *sh;
    sds s;
    //根据不同的长度选择不同的sdshdr类型(8 16 32 64),从这里可以看到redis自身是没有使用5的
    char type = sdsReqType(initlen);
    /* Empty strings are usually created in order to append. Use type 8
     * since type 5 is not good at this. */
    if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
    int hdrlen = sdsHdrSize(type);
    unsigned char *fp; /* flags pointer. */
    //usable定义在zmalloc.c中,'*usable' is set to the usable size if non NULL.
    size_t usable;
    
    //这里在为sdshdr分配内存时,hdrlen为头部大小(len,alloc,flags的大小),hdrlen为sds的大小,也为buf的大小,1为末尾隐含的空字符大小
    assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */    
    sh = trymalloc?
        s_trymalloc_usable(hdrlen+initlen+1, &usable) :
        s_malloc_usable(hdrlen+initlen+1, &usable);
    if (sh == NULL) return NULL;
    if (init==SDS_NOINIT)
        init = NULL;
    else if (!init)
        memset(sh, 0, hdrlen+initlen+1); // 内存初始化为0
    //s为buf数组的起始位置
    s = (char*)sh+hdrlen;
    //-1和以上sds[-1]等价,获得flags的位置
    fp = ((unsigned char*)s)-1;
    //非空的话为可用大小
    usable = usable-hdrlen-1;
    if (usable > sdsTypeMaxSize(type))
        usable = sdsTypeMaxSize(type);
    // 初始化sdshdr中的len,alloc,flags字段
    switch(type) {
        case SDS_TYPE_5: {
            *fp = type | (initlen << SDS_TYPE_BITS);
            break;
        }
        case SDS_TYPE_8: {
            SDS_HDR_VAR(8,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_16: {
            SDS_HDR_VAR(16,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_32: {
            SDS_HDR_VAR(32,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_64: {
            SDS_HDR_VAR(64,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
    }
    //初始化sdshdr的buf数组,即sds的初始化
    if (initlen && init)
        memcpy(s, init, initlen);
    s[initlen] = '\0';
    return s;
}

sds空间预分配:

/* Enlarge the free space at the end of the sds string so that the caller
 * is sure that after calling this function can overwrite up to addlen
 * bytes after the end of the string, plus one more byte for nul term.
 * If there's already sufficient free space, this function returns without any
 * action, if there isn't sufficient free space, it'll allocate what's missing,
 * and possibly more:
 * When greedy is 1, enlarge more than needed, to avoid need for future reallocs
 * on incremental growth.
 * When greedy is 0, enlarge just enough so that there's free space for 'addlen'.
 *
 * Note: this does not change the *length* of the sds string as returned
 * by sdslen(), but only the free buffer space we have. */
sds _sdsMakeRoomFor(sds s, size_t addlen, int greedy) {
    void *sh, *newsh;
    size_t avail = sdsavail(s);
    size_t len, newlen;
    char type, oldtype = s[-1] & SDS_TYPE_MASK;
    int hdrlen;
    size_t usable;

    /* Return ASAP if there is enough space left. */
    //如果剩余空间足够的话,则不做realloc操作直接返回
    if (avail >= addlen) return s;

    len = sdslen(s);
    sh = (char*)s-sdsHdrSize(oldtype);
    //如果剩余空间不足,则重新给出新的len
    newlen = (len+addlen);
    assert(newlen > len);   /* Catch size_t overflow */
    //#define SDS_MAX_PREALLOC (1024*1024)  SDS_MAX_PREALLOC的大小为1M
    if (greedy == 1) {
        //如果新的长度小于1m的话,则二倍扩容
        if (newlen < SDS_MAX_PREALLOC)
            newlen *= 2;
        else
            //如果新的长度大于1m的话,则每次只新加1m(插句题外话,对于redis,1m以上的value都是大key了(从源码中也可以看到,作者也认为正常的value大小是应该在1m以上的),需要从业务上减小value的大小,保证redis的性能)
            newlen += SDS_MAX_PREALLOC;
    }
    //修改sdshdr的len
    type = sdsReqType(newlen);

    /* Don't use type 5: the user is appending to the string and type 5 is
     * not able to remember empty space, so sdsMakeRoomFor() must be called
     * at every appending operation. */
    //上述也说了,假如字符串追加发生了realloc,那么需要将新的地址返回,这里的注释也说明了这一点
    if (type == SDS_TYPE_5) type = SDS_TYPE_8;
    //之后的操作就是将sdshdr复制一份,给到新的sdshdr里,之后将sds地址返回给调用方,避免sds地址丢失
    hdrlen = sdsHdrSize(type);
    assert(hdrlen + newlen + 1 > len);  /* Catch size_t overflow */
    if (oldtype==type) {
        newsh = s_realloc_usable(sh, hdrlen+newlen+1, &usable);
        if (newsh == NULL) return NULL;
        s = (char*)newsh+hdrlen;
    } else {
        /* Since the header size changes, need to move the string forward,
         * and can't use realloc */
        newsh = s_malloc_usable(hdrlen+newlen+1, &usable);
        if (newsh == NULL) return NULL;
        memcpy((char*)newsh+hdrlen, s, len+1);
        s_free(sh);
        s = (char*)newsh+hdrlen;
        s[-1] = type;
        sdssetlen(s, len);
    }
    usable = usable-hdrlen-1;
    if (usable > sdsTypeMaxSize(type))
        usable = sdsTypeMaxSize(type);
    sdssetalloc(s, usable);
    return s;
}

上述源码中这个问题的答案:为什么redis都说1m以上的是大key,为什么不是500k 800k?
sds的惰性释放空间策略:

/* Modify an sds string in-place to make it empty (zero length).
 * However all the existing buffer is not discarded but set as free space
 * so that next append operations will not require allocations up to the
 * number of bytes previously available. */
void sdsclear(sds s) {
    sdssetlen(s, 0);
    s[0] = '\0';
}

至此,redis的sds部分结束~