读书人

android系统启动流程起动画面学习之in

发布时间: 2012-09-19 13:43:54 作者: rapoo

android系统启动流程启动画面学习之init和init.rc分析

这段时间,断断续续的忙了一阵,因为父亲的去世也不情愿的休息了20来天,一点也没有办法。回来后重新开始学习android的启动流程。对android系统级别的学习,阅读代码成为了唯一的办法,不像应用程序开发来得那么明了快捷。之前花了好多时间才对android的binder驱动做了一定的了解,最近几天从android的启动画面,分析到了init这个内核最先启动的一个进程。参考内容包括老罗的android之旅和邓平凡老师的深入理解android卷,本人只是对学习做一定的总结,帮助自己进一步理解。

一 . init.c中的main函数(路径:system/core/init/init.c)

先给出main的源码,然后对个别关键函数进行分析

int main(int argc, char **argv){    int fd_count = 0;    struct pollfd ufds[4];    char *tmpdev;    char* debuggable;    char tmp[32];    int property_set_fd_init = 0;    int signal_fd_init = 0;    int keychord_fd_init = 0;    if (!strcmp(basename(argv[0]), "ueventd"))        return ueventd_main(argc, argv);    /* clear the umask */    umask(0);        /* Get the basic filesystem setup we need put         * together in the initramdisk on / and then we'll         * let the rc file figure out the rest.         */    mkdir("/dev", 0755);    mkdir("/proc", 0755);    mkdir("/sys", 0755);    mount("tmpfs", "/dev", "tmpfs", 0, "mode=0755");    mkdir("/dev/pts", 0755);    mkdir("/dev/socket", 0755);    mount("devpts", "/dev/pts", "devpts", 0, NULL);    mount("proc", "/proc", "proc", 0, NULL);    mount("sysfs", "/sys", "sysfs", 0, NULL);        /* We must have some place other than / to create the         * device nodes for kmsg and null, otherwise we won't         * be able to remount / read-only later on.         * Now that tmpfs is mounted on /dev, we can actually         * talk to the outside world.         */    open_devnull_stdio();    log_init();        INFO("reading config file\n");    init_parse_config_file("/init.rc");    /* pull the kernel commandline and ramdisk properties file in */    import_kernel_cmdline(0);    get_hardware_name(hardware, &revision);    snprintf(tmp, sizeof(tmp), "/init.%s.rc", hardware);//和平台硬件hardware有关系    init_parse_config_file(tmp);    action_for_each_trigger("early-init", action_add_queue_tail);    queue_builtin_action(wait_for_coldboot_done_action, "wait_for_coldboot_done");    queue_builtin_action(property_init_action, "property_init");    queue_builtin_action(keychord_init_action, "keychord_init");    queue_builtin_action(console_init_action, "console_init");//第二个开机画面显示函数    queue_builtin_action(set_init_properties_action, "set_init_properties");        /* execute all the boot actions to get us started */    action_for_each_trigger("init", action_add_queue_tail);    action_for_each_trigger("early-fs", action_add_queue_tail);    action_for_each_trigger("fs", action_add_queue_tail);    action_for_each_trigger("post-fs", action_add_queue_tail);    queue_builtin_action(property_service_init_action, "property_service_init");    queue_builtin_action(signal_init_action, "signal_init");    queue_builtin_action(check_startup_action, "check_startup");    /* execute all the boot actions to get us started */    action_for_each_trigger("early-boot", action_add_queue_tail);    action_for_each_trigger("boot", action_add_queue_tail);//把boot这个action添加到action_queue链表中        /* run all property triggers based on current state of the properties */    queue_builtin_action(queue_property_triggers_action, "queue_propety_triggers");#if BOOTCHART    queue_builtin_action(bootchart_init_action, "bootchart_init");#endif    for(;;) {        int nr, i, timeout = -1;        execute_one_command(); //检查action_queue列表是否为空,执行action        restart_processes();//检查是否有进程需要重启        if (!property_set_fd_init && get_property_set_fd() > 0) {            ufds[fd_count].fd = get_property_set_fd();            ufds[fd_count].events = POLLIN;            ufds[fd_count].revents = 0;            fd_count++;            property_set_fd_init = 1;        }        if (!signal_fd_init && get_signal_fd() > 0) {            ufds[fd_count].fd = get_signal_fd();            ufds[fd_count].events = POLLIN;            ufds[fd_count].revents = 0;            fd_count++;            signal_fd_init = 1;        }        if (!keychord_fd_init && get_keychord_fd() > 0) {            ufds[fd_count].fd = get_keychord_fd();            ufds[fd_count].events = POLLIN;            ufds[fd_count].revents = 0;            fd_count++;            keychord_fd_init = 1;        }        if (process_needs_restart) {            timeout = (process_needs_restart - gettime()) * 1000;            if (timeout < 0)                timeout = 0;        }        if (!action_queue_empty() || cur_action)            timeout = 0;#if BOOTCHART        if (bootchart_count > 0) {            if (timeout < 0 || timeout > BOOTCHART_POLLING_MS)                timeout = BOOTCHART_POLLING_MS;            if (bootchart_step() < 0 || --bootchart_count == 0) {                bootchart_finish();                bootchart_count = 0;            }        }#endif        nr = poll(ufds, fd_count, timeout);        if (nr <= 0)            continue;        for (i = 0; i < fd_count; i++) {            if (ufds[i].revents == POLLIN) {                if (ufds[i].fd == get_property_set_fd())                    handle_property_set_fd();                else if (ufds[i].fd == get_keychord_fd())                    handle_keychord();                else if (ufds[i].fd == get_signal_fd())                    handle_signal();            }        }    }    return 0;}

init作为用户空间第一个启动的进程,需要完成很多的任务。分以下部分内容来分析

1. uevent进程

if (!strcmp(basename(argv[0]), "ueventd"))
return ueventd_main(argc, argv);

这个函数是取出argv中的第一个参数,比如/sbin/ueventd,则basename为ueventd。android系统第一次启动的进程名init,所以该函数ueventd_main不执行,该函数的真正执行在init启动service ueventd /sbin/ueventd后,fork出一个子进程,execve启动/sbin/ueventd后,实际上该函数是对init的符合链接,也就是ueventd进程执行起来后执行的代码还是init.c中的main,因此不同的进程名执行相同的main函数。ueventd_main函数的主要功能:在Linux系统中现在都使用uevent机制来管理设备的热插拔事件,给用户空间权利来完成一些设备文件节点的创建。这种机制是建立在socket的通信机制上,用户空间和内核驱动进行交互,详细的机制没有去了解过。是linux2.6的版本中常用的机制。比如驱动出现device_create等时,会向用户空间报告一个uevent事件,用户空间由uevent进程解析后去创建设备节点。

2.init.rc的解析

INFO("reading config file\n");
init_parse_config_file("/init.rc");

init.rc是一个配置文件,内部有许多的语言规则,所有语言会在init_parse_config_file中进行解析。调用流程如下:init_parse_config_file—>read_file—>parse_config.

parse_config源码如下:

static void parse_config(const char *fn, char *s)//s为init.rc中字符串的内容{    struct parse_state state;    char *args[INIT_PARSER_MAXARGS];    int nargs;    nargs = 0;    state.filename = fn;    state.line = 1;    state.ptr = s;    state.nexttoken = 0;    state.parse_line = parse_line_no_op;    for (;;) {        switch (next_token(&state)) {        case T_EOF:       //文件的结尾            state.parse_line(&state, 0, 0);            return;        case T_NEWLINE://新的一行            if (nargs) {                int kw = lookup_keyword(args[0]);          //读取init.rc返回关键字例如service,返回K_service                if (kw_is(kw, SECTION)) {                        //查看关键字是否为SECTION,只有service和on满足                    state.parse_line(&state, 0, 0);                    parse_new_section(&state, kw, nargs, args);                } else {                    state.parse_line(&state, nargs, args);//on 和service两个段下面的内容                }                nargs = 0;            }            break;        case T_TEXT://文本内容            if (nargs < INIT_PARSER_MAXARGS) {                args[nargs++] = state.text;            }            break;        }    }}int init_parse_config_file(const char *fn){    char *data;    data = read_file(fn, 0);    if (!data) return -1;    parse_config(fn, data);    DUMP();    return 0;}

这个函数中可以看到在for的无邪循环中,主要对init.rc的内容进行解析,以一行一行进行读取,每读取完一行内容换行时到下一行时,使用lookup_keyword分析已经读取的一行的第一个参数,部分代码如下:

case 's':        if (!strcmp(s, "ervice")) return K_service;        if (!strcmp(s, "etenv")) return K_setenv;        if (!strcmp(s, "etkey")) return K_setkey;        if (!strcmp(s, "etprop")) return K_setprop;        if (!strcmp(s, "etrlimit")) return K_setrlimit;        if (!strcmp(s, "ocket")) return K_socket;        if (!strcmp(s, "tart")) return K_start;        if (!strcmp(s, "top")) return K_stop;        if (!strcmp(s, "ymlink")) return K_symlink;        if (!strcmp(s, "ysclktz")) return K_sysclktz;

该函数主要对每一行的第一个字符做case,然后在strcmp第一个命令,这些命令都是按init.rc的格式要求来进行的。比如常用的service和on等经过lookup_keyword后返回K_servcie和K_on。随后使用kw_is(kw, SECTION)判断返回的kw是不是属于SECTION类型,在init.rc中只有service和on满足该类型,这样就会对on和service所在的段进行解析,我们这里首先分析service,以init.rc中的service zygote为例

service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server    class main    socket zygote stream 666    onrestart write /sys/android_power/request_state wake    onrestart write /sys/power/state on    onrestart restart media    onrestart restart netd

当解析到这段代码时,执行parse_service

static void *parse_service(struct parse_state *state, int nargs, char **args){    struct service *svc;    if (nargs < 3) {        parse_error(state, "services must have a name and a program\n");        return 0;    }    if (!valid_name(args[1])) {        parse_error(state, "invalid service name '%s'\n", args[1]);        return 0;    }    svc = service_find_by_name(args[1]);//查找服务是否已经存在    if (svc) {        parse_error(state, "ignored duplicate definition of service '%s'\n", args[1]);        return 0;    }    nargs -= 2;    svc = calloc(1, sizeof(*svc) + sizeof(char*) * nargs);    if (!svc) {        parse_error(state, "out of memory\n");        return 0;    }    svc->name = args[1];          //sevice的名字    svc->classname = "default"; //svc的类名默认是default    memcpy(svc->args, args + 2, sizeof(char*) * nargs);//首个参数放的是可执行文件    svc->args[nargs] = 0;    svc->nargs = nargs;//参数个数    svc->onrestart.name = "onrestart";    list_init(&svc->onrestart.commands);    list_add_tail(&service_list, &svc->slist);    return svc;}

在这里agrs[1]就是zygote,系统会先查找是否已经存在该服务,然后构建一个service svc,进行相关的填充,包括服务名,服务所属的类别名字,已经服务启动带入的参数个数(要减去service和服务名zygote),最后将这个svc加入到service_list全局链表中。随后所做的是对Service的下面几行Option进行解析,比如class,socket,onrestart等等。使用的是parse_line_service函数,如下:

static void parse_line_service(struct parse_state *state, int nargs, char **args){    struct service *svc = state->context;    struct command *cmd;    int i, kw, kw_nargs;    if (nargs == 0) {        return;    }    svc->ioprio_class = IoSchedClass_NONE;    kw = lookup_keyword(args[0]);    switch (kw) {    case K_capability:        break;    case K_class:        if (nargs != 2) {            parse_error(state, "class option requires a classname\n");        } else {            svc->classname = args[1];//比如main,core类        }        break;   case K_console:        svc->flags |= SVC_CONSOLE;        break;    case K_disabled:        svc->flags |= SVC_DISABLED;...... case K_onrestart:        nargs--;        args++;        kw = lookup_keyword(args[0]);        if (!kw_is(kw, COMMAND)) {            parse_error(state, "invalid command '%s'\n", args[0]);            break;        }        kw_nargs = kw_nargs(kw);        if (nargs < kw_nargs) {            parse_error(state, "%s requires %d %s\n", args[0], kw_nargs - 1,                kw_nargs > 2 ? "arguments" : "argument");            break;        }        cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs);        cmd->func = kw_func(kw);        cmd->nargs = nargs;        memcpy(cmd->args, args, sizeof(char*) * nargs);        list_add_tail(&svc->onrestart.commands, &cmd->clist);        break;.......}

这里以class这个keyword为例,会将当前class所属的svc进行类名的变革,变为main类别,类似的socket和onrestart类似。

到此为止整个service都解析完成 ,开始下一个section的内容。但是zygote这个服务进程的启动还没有开始,将在下面分析。

下面分析on字段的内容,以on boot这个section作为列子进行分析

on boot  ifup lo    hostname localhost    domainname localdomain....# Set this property so surfaceflinger is not started by system_init    setprop system_init.startsurfaceflinger 0    class_start core    class_start main

和前面分析像类似,case中进入K_on选项执行函数parse_action

static void *parse_action(struct parse_state *state, int nargs, char **args){    struct action *act;    if (nargs < 2) {        parse_error(state, "actions must have a trigger\n");        return 0;    }    if (nargs > 2) {        parse_error(state, "actions may not have extra parameters\n");        return 0;    }    act = calloc(1, sizeof(*act));    act->name = args[1];  //action的名字如boot,init等    list_init(&act->commands);    list_add_tail(&action_list, &act->alist);        /* XXX add to hash */    return act;}

在这里可以看到一个action结构体类似于service,这个action的名字为boot,最后会将这个action加入到全局链表action_list中。

随后执行parse_line_action函数,对on字段所在的option进行解析,代码如下:

static void parse_line_action(struct parse_state* state, int nargs, char **args) //action所在的行{    struct command *cmd;    struct action *act = state->context;//on boot启动    int (*func)(int nargs, char **args);    int kw, n;    if (nargs == 0) {        return;    }    kw = lookup_keyword(args[0]);//命令的参数个数    if (!kw_is(kw, COMMAND)) {        parse_error(state, "invalid command '%s'\n", args[0]);        return;    }    n = kw_nargs(kw);    if (nargs < n) {        parse_error(state, "%s requires %d %s\n", args[0], n - 1,            n > 2 ? "arguments" : "argument");        return;    }    cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs);    cmd->func = kw_func(kw);    cmd->nargs = nargs;    memcpy(cmd->args, args, sizeof(char*) * nargs);    list_add_tail(&act->commands, &cmd->clist);   //

这里以class_start main为例该关键字为 KEYWORD(class_start, COMMAND, 1, do_class_start),填充一个command结构体,包括这个cmd的执行函数如class_start对应的func为do_class_start,函数的参数个数nargs=1。同时将这个cmd添加到action的commands所在的全局列表中。本文中将会出现2个cmd。

至此,on和service两个section已经举列子分析完成。

3 下面继续分析main函数中的queue_builtin_action和action_for_each_trigger。

queue_builtin_action(console_init_action, "console_init");//第二个开机画面显示函数
该函数实现将console_init这个action添加到action_queue全局链表中看。

action_for_each_trigger("boot", action_add_queue_tail);//把boot这个action添加到action_queue链表中

void action_for_each_trigger(const char *trigger,                             void (*func)(struct action *act)){    struct listnode *node;    struct action *act;    list_for_each(node, &action_list) {        act = node_to_item(node, struct action, alist);        if (!strcmp(act->name, trigger)) {            func(act);        }    }}

在该函数中,首先遍历action_list链表,找到action,看是否有名字叫boot的trigger存在,我们知道刚才在解析init.rc中的on boot时,将boot这个作为action的name加入到了action_list中去,所以可以找到这个boot的action。成功匹配后调用action_add_queue_tail,家这个action再次加入到action_queue中,等待着执行。

4 for(;;)循环中执行execute_one_command

void execute_one_command(void){    int ret;    if (!cur_action || !cur_command || is_last_command(cur_action, cur_command)) {        cur_action = action_remove_queue_head();        cur_command = NULL;        if (!cur_action)            return;        INFO("processing action %p (%s)\n", cur_action, cur_action->name);        cur_command = get_first_command(cur_action);    } else {        cur_command = get_next_command(cur_action, cur_command);    }    if (!cur_command)        return;    ret = cur_command->func(cur_command->nargs, cur_command->args);//执行class_start等    INFO("command '%s' r=%d\n", cur_command->args[0], ret);}

使用action_remove_queue_head获取action_queue链表中的action后,移除该节点,使用get_first_command获得在action中的命令,比如这里出现的boot和console_init这两个action。针对console_init启动console_init_action这个函数。如果是boot则会对boot这个action所具有的commands链表进行cmd的获取,class_start的func指针函数为do_class_start:

int do_class_start(int nargs, char **args){        /* Starting a class does not start services         * which are explicitly disabled.  They must         * be started individually.         */    service_for_each_class(args[1], service_start_if_not_disabled);//查找要启动的舒服所属类是否是当前要启动的类    return 0;}

可以看到提取了命令行的第二个参数入main,core等。在service_for_each_class中遍历service_list查找属于该类的service,如我们前面提到的zygote,查找到后执行service_start_if_not_disabled——>service_start至此我们进入了启动service的代码

void service_start(struct service *svc, const char *dynamic_args){    struct stat s;    pid_t pid;    int needs_console;    int n;        /* starting a service removes it from the disabled         * state and immediately takes it out of the restarting         * state if it was in there         */    svc->flags &= (~(SVC_DISABLED|SVC_RESTARTING));    svc->time_started = 0;//服务的启动时间设为0             /* running processes require no additional work -- if         * they're in the process of exiting, we've ensured         * that they will immediately restart on exit, unless         * they are ONESHOT         */    if (svc->flags & SVC_RUNNING) {        return;    }    needs_console = (svc->flags & SVC_CONSOLE) ? 1 : 0;    if (needs_console && (!have_console)) {        ERROR("service '%s' requires console\n", svc->name);        svc->flags |= SVC_DISABLED;        return;    }    if (stat(svc->args[0], &s) != 0) {   //通过文件名获取文件信息保存到s的buf中        ERROR("cannot find '%s', disabling '%s'\n", svc->args[0], svc->name);        svc->flags |= SVC_DISABLED;        return;    }    if ((!(svc->flags & SVC_ONESHOT)) && dynamic_args) {        ERROR("service '%s' must be one-shot to use dynamic args, disabling\n",               svc->args[0]);        svc->flags |= SVC_DISABLED;        return;    }    NOTICE("starting '%s'\n", svc->name);    pid = fork();//创建子进程    if (pid == 0) {   //子进程        struct socketinfo *si;        struct svcenvinfo *ei;        char tmp[32];        int fd, sz;        if (properties_inited()) {            get_property_workspace(&fd, &sz);            sprintf(tmp, "%d,%d", dup(fd), sz);            add_environment("ANDROID_PROPERTY_WORKSPACE", tmp);        }        for (ei = svc->envvars; ei; ei = ei->next)            add_environment(ei->name, ei->value);        for (si = svc->sockets; si; si = si->next) {            int socket_type = (                    !strcmp(si->type, "stream") ? SOCK_STREAM :                        (!strcmp(si->type, "dgram") ? SOCK_DGRAM : SOCK_SEQPACKET));            int s = create_socket(si->name, socket_type,                                  si->perm, si->uid, si->gid);//创建套接字            if (s >= 0) {                publish_socket(si->name, s);            }        }        if (svc->ioprio_class != IoSchedClass_NONE) {            if (android_set_ioprio(getpid(), svc->ioprio_class, svc->ioprio_pri)) {                ERROR("Failed to set pid %d ioprio = %d,%d: %s\n",                      getpid(), svc->ioprio_class, svc->ioprio_pri, strerror(errno));            }        }        if (needs_console) {            setsid();            open_console();        } else {            zap_stdio();        }#if 0        for (n = 0; svc->args[n]; n++) {            INFO("args[%d] = '%s'\n", n, svc->args[n]);        }        for (n = 0; ENV[n]; n++) {            INFO("env[%d] = '%s'\n", n, ENV[n]);        }#endif        setpgid(0, getpid());    /* as requested, set our gid, supplemental gids, and uid */        if (svc->gid) {            setgid(svc->gid);        }        if (svc->nr_supp_gids) {            setgroups(svc->nr_supp_gids, svc->supp_gids);        }        if (svc->uid) {            setuid(svc->uid);        }        if (!dynamic_args) {            if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0) {                ERROR("cannot execve('%s'): %s\n", svc->args[0], strerror(errno));//执行服务的可执行文件            }        } else {            char *arg_ptrs[INIT_PARSER_MAXARGS+1];            int arg_idx = svc->nargs;            char *tmp = strdup(dynamic_args);            char *next = tmp;            char *bword;            /* Copy the static arguments */            memcpy(arg_ptrs, svc->args, (svc->nargs * sizeof(char *)));            while((bword = strsep(&next, " "))) {                arg_ptrs[arg_idx++] = bword;                if (arg_idx == INIT_PARSER_MAXARGS)                    break;            }            arg_ptrs[arg_idx] = '\0';            execve(svc->args[0], (char**) arg_ptrs, (char**) ENV);        }        _exit(127);    }    if (pid < 0) {        ERROR("failed to start '%s'\n", svc->name);        svc->pid = 0;        return;    }    svc->time_started = gettime();    svc->pid = pid;    svc->flags |= SVC_RUNNING;    if (properties_inited())        notify_service_state(svc->name, "running");}

分析这段代码,主要内容:

a.检查当前service如zygote的flag即SVC_RUNNING(服务运行中),SVC_DISABLE等

b.fork一个子进程,子进程中会建立一个socket用于通信,同时使用if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0)执行zygote对应的可执行文件,至此service zygote真正的启动。

到这里为止,对android系统的init启动有了清晰的了解,对init如何启动adbd,zygote等service有了一定的了解,以及对init.rc有了清晰的认识。init中还有部分内容等着后续几天做一定的学习。

补充:service进程的重启在restart_processes中进行,他会重启flag为SVC_RESTARTING的服务。这部分进程的重启其实在init由handle_signal来管理,一旦出现service崩溃,poll函数会接受到相关文件变化的信息,执行handle_signal中的wait_for_one_process

static int wait_for_one_process(int block){    pid_t pid;    int status;    struct service *svc;    struct socketinfo *si;    time_t now;    struct listnode *node;    struct command *cmd;    while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR );    if (pid <= 0) return -1;    INFO("waitpid returned pid %d, status = %08x\n", pid, status);    svc = service_find_by_pid(pid);    if (!svc) {        ERROR("untracked pid %d exited\n", pid);        return 0;    }..... svc->flags |= SVC_RESTARTING;    /* Execute all onrestart commands for this service. */    list_for_each(node, &svc->onrestart.commands) {        cmd = node_to_item(node, struct command, clist);        cmd->func(cmd->nargs, cmd->args);    }    notify_service_state(svc->name, "restarting");    return 0;}

该函数使用waitpid,找到子进程退出的进程号pid,然后查找到该service,对service中的onrestart这个commands进行操作,入restart media等。同时将service的flag设置为SVC_RESTARTING,这样就结合前面讲到的restart_processes重新启动该服务进程。。







读书人网 >Android

热点推荐