博客年龄:17年10个月
访问:?
文章:291篇

个人描述

姓名:丹臣 职业:DBA 公司:TAOBAO Mail:zhaolinjnu(at)163.com MSN:echo_lin@hotmail.com 微博http://twitter.com/zhaolinjnu

关于innodb_open_files,以及tablespace_memory_cache

2011-08-09 18:36 阅读(?)评论(0)

     

      前几天因为一次故障,怀疑是不是innodb_open_files参数设置过小的问题,下面的一些代码可以作出一些解释,总体来说,这是一个软限制,但对独立表空间,文件比较多,并发比较大的数据库,这个参数建议还是需要调大,尽量减少不必要的不断淘汰node,打来文件,加入LRU的过程。感兴趣的同学可以看看下面整理的一些代码。

 

      先来看一个重要的结构体fil_system_struct,在mysql innodb里,它是一个tablespace memory cache。我们来看看它的定义,以及它的一些成员变量:

/** The tablespace memory cache */

typedef struct fil_system_struct    fil_system_t;

 

/** The tablespace memory cache; also the totality of logs (the log

data space) is stored here; below we talk about tablespaces, but also

the ib_logfiles form a 'space' and it is handled here */

 

struct fil_system_struct {

#ifndef UNIV_HOTBACKUP

    mutex_t     mutex;      /*!< The mutex protecting the cache */

#endif /* !UNIV_HOTBACKUP */

    hash_table_t*   spaces;     /*!< The hash table of spaces in the

                    system; they are hashed on the space

                    id */

    hash_table_t*   name_hash;  /*!< hash table based on the space

                    name */

    UT_LIST_BASE_NODE_T(fil_node_t) LRU;

                    /*!< base node for the LRU list of the

                    most recently used open files with no

                    pending i/o's; if we start an i/o on

                    the file, we first remove it from this

                    list, and return it to the start of

                    the list when the i/o ends;

                    log files and the system tablespace are

                    not put to this list: they are opened

                    after the startup, and kept open until

                    shutdown */

    UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;

                    /*!< base node for the list of those

                    tablespaces whose files contain

                    unflushed writes; those spaces have

                    at least one file node where

                    modification_counter > flush_counter */

    ulint       n_open;     /*!< number of files currently open */

    ulint       max_n_open; /*!< n_open is not allowed to exceed

                    this */

    ib_int64_t  modification_counter;/*!< when we write to a file we

                    increment this by one */

    ulint       max_assigned_id;/*!< maximum space id in the existing

                    tables, or assigned during the time

                    mysqld has been up; at an InnoDB

                    startup we scan the data dictionary

                    and set here the maximum of the

                    space id's of the tables there */

    ib_int64_t  tablespace_version;

                    /*!< a counter which is incremented for

                    every space object memory creation;

                    every space mem object gets a

                    'timestamp' from this; in DISCARD/

                    IMPORT this is used to check if we

                    should ignore an insert buffer merge

                    request */

    UT_LIST_BASE_NODE_T(fil_space_t) space_list;

                    /*!< list of all file spaces */

};

 

/** The tablespace memory cache. This variable is NULL before the module isinitialized. */

static fil_system_t*    fil_system  = NULL;

 

 

innobase在启动的时候,会初始化一些参数,其中包括srv_max_n_open_files,这个参数据有一个初始值,定义为UNIV_INTERN ulint   srv_max_n_open_files = 300;

 

/********************************************************************

Starts InnoDB and creates a new database if database files

are not found and the user wants.

@return DB_SUCCESS or error code */

UNIV_INTERN

int

innobase_start_or_create_for_mysql(void)

/*====================================*/

    fil_init(srv_file_per_table ? 50000 : 5000,srv_max_n_open_files);

 

 

我们看一下具体的fil_init函数初始化具体会做些什么?主要是为fil_system结构体申请相应的空间,以及给一些成员变量赋值。

/****************************************************************//**

Initializes the tablespace memory cache. */

UNIV_INTERN

void

fil_init(

/*=====*/

    ulint   hash_size,  /*!< in: hash table size */

    ulint   max_n_open) /*!< in: max number of open files */

{

    ut_a(fil_system == NULL);

 

    ut_a(hash_size > 0);

    ut_a(max_n_open > 0);

 

    fil_system = mem_alloc(sizeof(fil_system_t));

 

    mutex_create(&fil_system->mutex, SYNC_ANY_LATCH);

   

    //下面两个hash table主要是用查找用的

    fil_system->spaces = hash_create(hash_size);

    fil_system->name_hash = hash_create(hash_size);

 

    UT_LIST_INIT(fil_system->LRU);//这个是淘汰用的

 

    fil_system->n_open = 0;

    fil_system->max_n_open = max_n_open;

 

    fil_system->modification_counter = 0;

    fil_system->max_assigned_id = 0;

 

    fil_system->tablespace_version = 0;

 

    UT_LIST_INIT(fil_system->unflushed_spaces);

    UT_LIST_INIT(fil_system->space_list);

}

 

 

我们来看fil_mutex_enter_and_prepare_for_io函数,它会来检查当前打开的文件数有没有超过定义的最大值,如果超过了,会进行关闭。如果我们采用独立的表空间,表又比较多,并发如果比较大,则需要把此参数调大,则可以缓存更多的表空间信息到tablespace memory cache.但这个参数不能理解成一个硬限制,可以打开超过innodb_open_files大小的文件

root@(none) 12:47:33>show variables like '%innodb_open%';

+-------------------+-------+

| Variable_name     | Value |

+-------------------+-------+

| innodb_open_files | 300   |

+-------------------+-------+

1 row in set (0.01 sec)

 

 

/*******************************************************************//**

Reserves the fil_system mutex and tries to make sure we can open at least one

file while holding it. This should be called before calling

fil_node_prepare_for_io(), because that function may need to open a file. */

static

void

fil_mutex_enter_and_prepare_for_io(

/*===============================*/

    ulint   space_id)   /*!< in: space id */

{

    fil_space_t*    space;

    ibool       success;

    ibool       print_info  = FALSE;

    ulint       count       = 0;

    ulint       count2      = 0;

 

retry:

    mutex_enter(&fil_system->mutex);

 

    if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {

        /* We keep log files and system tablespace files always open;

        this is important in preventing deadlocks in this module, as

        a page read completion often performs another read from the

        insert buffer. The insert buffer is in tablespace 0, and we

        cannot end up waiting in this function. */

 

        return;

    }

 

    if (fil_system->n_open < fil_system->max_n_open) {

        return;

    }

 

    space = fil_space_get_by_id(space_id);

 

    if (space != NULL && space->stop_ios) {

        /* We are going to do a rename file and want to stop new i/o's

        for a while */

 

        if (count2 > 20000) {

            fputs("InnoDB: Warning: tablespace ", stderr);

            ut_print_filename(stderr, space->name);

            fprintf(stderr,

                " has i/o ops stopped for a long time %lu\n",

                (ulong) count2);

        }

 

        mutex_exit(&fil_system->mutex);

        os_thread_sleep(20000);

        count2++;

        goto retry;

    }

 

    /* If the file is already open, no need to do anything; if the space

    does not exist, we handle the situation in the function which called

    this function */

 

    if (!space || UT_LIST_GET_FIRST(space->chain)->open) {

 

        return;

    }

 

    if (count > 1) {

        print_info = TRUE;

    }

 

/* Too many files are open, try to close some */

close_more:

    success = fil_try_to_close_file_in_LRU(print_info);

    if (success && fil_system->n_open >= fil_system->max_n_open) {

        goto close_more;

    }

 

    if (fil_system->n_open < fil_system->max_n_open) {

        /* Ok */

        return;

    }

…后面代码省略

 

在fil_node_prepare_for_io函数中,也有关于打开最大文件数的判断,但也只是一个软限制,会打出Warning,不是硬限制.

Prepares a file node for i/o. Opens the file if it is closed. Updates the

pending i/o's field in the node and the system appropriately. Takes the node

off the LRU list if it is in the LRU list. The caller must hold the fil_sys

mutex. */

static

void

fil_node_prepare_for_io(

/*====================*/

    fil_node_t* node,   /*!< in: file node */

    fil_system_t*   system, /*!< in: tablespace memory cache */

    fil_space_t*    space)  /*!< in: space */

{

    ut_ad(node && system && space);

    ut_ad(mutex_own(&(system->mutex)));

 

    if (system->n_open > system->max_n_open + 5) {

        ut_print_timestamp(stderr);

        fprintf(stderr,

            "  InnoDB: Warning: open files %lu"

            " exceeds the limit %lu\n",

            (ulong) system->n_open,

            (ulong) system->max_n_open);

    }

 

    if (node->open == FALSE) {

        /* File is closed: open it */

        ut_a(node->n_pending == 0);

 

        fil_node_open_file(node, system, space);

    }

 

    if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE

        && space->id != 0) {

        /* The node is in the LRU list, remove it */

        //大家可以想想,这里为什么要移除?

        ut_a(UT_LIST_GET_LEN(system->LRU) > 0);

        UT_LIST_REMOVE(LRU, system->LRU, node);

    }

 

    node->n_pending++;

}

 

 

Mysql innodb的系统表空间数据文件,以及innodb日志文件,是会一直打开的,并缓存在tablespace memory cache当中,这些不会被淘汰,这一点又是如何做到的呢?

/*******************************************************************//**

Opens all log files and system tablespace data files. They stay open until the

database server shutdown. This should be called at a server startup after the

space objects for the log and the system tablespace have been created. The

purpose of this operation is to make sure we never run out of file descriptors

if we need to read from the insert buffer or to write to the log. */

UNIV_INTERN

void

fil_open_log_and_system_tablespace_files(void)

/*==========================================*/

{

    fil_space_t*    space;

    fil_node_t* node;

 

    mutex_enter(&fil_system->mutex);

 

    space = UT_LIST_GET_FIRST(fil_system->space_list);

 

    while (space != NULL) {

        if (space->purpose != FIL_TABLESPACE || space->id == 0) {

            node = UT_LIST_GET_FIRST(space->chain);

 

            while (node != NULL) {

                if (!node->open) {

                    fil_node_open_file(node, fil_system,

                               space);

                }

                if (fil_system->max_n_open

                    < 10 + fil_system->n_open) {

                    fprintf(stderr,

                        "InnoDB: Warning: you must"

                        " raise the value of"

                        " innodb_open_files in\n"

                        "InnoDB: my.cnf! Remember that"

                        " InnoDB keeps all log files"

                        " and all system\n"

                        "InnoDB: tablespace files open"

                        " for the whole time mysqld is"

                        " running, and\n"

                        "InnoDB: needs to open also"

                        " some .ibd files if the"

                        " file-per-table storage\n"

                        "InnoDB: model is used."

                        " Current open files %lu,"

                        " max allowed"

                        " open files %lu.\n",

                        (ulong) fil_system->n_open,

                        (ulong) fil_system->max_n_open);

                }

                node = UT_LIST_GET_NEXT(chain, node);

            }

        }

        space = UT_LIST_GET_NEXT(space_list, space);

    }

 

    mutex_exit(&fil_system->mutex);

}

 

因为我们在在打开一个文件的时候,会作一个判断,只有非系统表空间的数据文件才会加入到LRU淘汰链表

/********************************************************************//**

Opens a the file of a node of a tablespace. The caller must own the fil_system

mutex. */

static

void

fil_node_open_file(

/*===============*/

    fil_node_t* node,   /*!< in: file node */

    fil_system_t*   system, /*!< in: tablespace memory cache */

    fil_space_t*    space)  /*!< in: space */

 

    if (space->purpose == FIL_TABLESPACE && space->id != 0) {

        /* Put the node to the LRU list */

        UT_LIST_ADD_FIRST(LRU, system->LRU, node);

    }

}

 

在淘汰一个普通的文件node的时候,会从LRU的末尾选取一个node,但会检查上面是否有pending IO,或者node->modification_counter != node->flush_counter,如果这两个条件中任何一个条件不满足,则会继续循环,直到找到这样的node为止。如果当前所有的文件都不符合条件,最终也会退出循环,即一个也不淘汰。

/********************************************************************//**

Tries to close a file in the LRU list. The caller must hold the fil_sys

mutex.

@return TRUE if success, FALSE if should retry later; since i/o's

generally complete in < 100 ms, and as InnoDB writes at most 128 pages

from the buffer pool in a batch, and then immediately flushes the

files, there is a good chance that the next time we find a suitable

node from the LRU list */

static

ibool

fil_try_to_close_file_in_LRU(

/*=========================*/

    ibool   print_info) /*!< in: if TRUE, prints information why it

                cannot close a file */

{

    fil_node_t* node;

 

    ut_ad(mutex_own(&fil_system->mutex));

 

    node = UT_LIST_GET_LAST(fil_system->LRU);

 

    if (print_info) {

        fprintf(stderr,

            "InnoDB: fil_sys open file LRU len %lu\n",

            (ulong) UT_LIST_GET_LEN(fil_system->LRU));

    }

 

    while (node != NULL) {

        //退出循环的条件

        if (node->modification_counter == node->flush_counter

            && node->n_pending_flushes == 0) {

 

            fil_node_close_file(node, fil_system);

 

            return(TRUE);

        }

 

        if (print_info && node->n_pending_flushes > 0) {

            fputs("InnoDB: cannot close file ", stderr);

            ut_print_filename(stderr, node->name);

            fprintf(stderr, ", because n_pending_flushes %lu\n",

                (ulong) node->n_pending_flushes);

        }

 

        if (print_info

            && node->modification_counter != node->flush_counter) {

            fputs("InnoDB: cannot close file ", stderr);

            ut_print_filename(stderr, node->name);

            fprintf(stderr,

                ", because mod_count %ld != fl_count %ld\n",

                (long) node->modification_counter,

                (long) node->flush_counter);

        }

 

        node = UT_LIST_GET_PREV(LRU, node);

    }

 

    return(FALSE);

}

 

对于mysql的许多参数,如果我们从代码的角度,会理解更深一些.

  最后修改于 2011-08-09 18:42    阅读(?)评论(0)
上一篇: 该日志被锁定 下一篇:该日志被锁定
 
表  情:
加载中...
 

请各位遵纪守法并注意语言文明