CDB | 字数总计: 7.5k | 阅读时长: 34分钟 | 阅读量:
CDB 我想学习数据结构,最好的方式就是使用数据结构的知识做出一个系统,而我认为对于目前的我来讲,最好的选择就是做一个小型的数据库引擎。取名为CDB
学习GitHub上的这个 项目。
必要技术准备
windows下文件的交互。参考CRT ,微软的这个官方文档真好。
测试技术,这个在json教程中已经学习过。
层次架构,学会对一个项目使用层次架构进行开发。
命令处理,或者说是解析器,即解析命令。
数据库底层数据结构的实现,这里采用B树。
c语言底层内存管理。
对操作系统底层的了解,比如数据库利用操作系统的分页概念进行优化。
第一步 数据库的交互框架 数据库工作的流程为: 1. 打印提示符接受输入 2. 解析输入,得到真实的命令 3. 将解析得到的语义进行数据库底层操作 4. 数据库底层操作很复杂,现在还不懂,懂了再加上
所以首先我们需要解析每一次的输入,并解析为程序可以读懂的信息,根据解析结果进行进一步操作。
所以我们需要一个缓冲区来存储输入。
1 2 3 4 5 6 typedef struct { char * buffer; size_t buffer_length; size_t input_length; }InputBuffer;
在得到每一次的输入之后还首先需要进行元命令判断 其结果使用enum进行选择。
1 2 3 4 5 6 typedef enum { META_COMMAND_EXIT, META_COMMAND_SUCCESS, META_COMMAND_UNRECOGNIZED_COMMAND }MetaCommandResult;
元命令目前分为两类,以类是以 . 开头的非SQL命令,另一类位SQL命令。
进行元命令判断之后进行处理,目前只有退出一项。
与元命令并行的是SQL命令的处理,其不以 . 开头。同理,这种命令的处理也存在几种状态。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 typedef enum { PREPARE_SUCCESS, PREPARE_SYNTAX_ERROR, PREPARE_UNRECOGNIZED_STATEMENT } PrepareResult; typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType; typedef struct { StatementType type; Row row_to_insert; } Statement;
程序预处理SQL命令得到这样的状态,根据状态来执行真正的SQL命令。
以上就是数据库的交互框架,就是一个巨大的状态机,虽然我不是很喜欢状态机,可是目前能力不足,只能使用状态机来实现了。希望以后做一个不使用状态机的大项目。
这一步的程序被覆盖掉了。
第二步 数据库的简单插入操作 这一步要实现数据库与底层的互连,打通和底层内存间的交互。实现简单的insert和使用select打印出数据表。这里全部使用硬编码。
下面讲一下数据库的底层安排。
table使用一个简单的示例:
1 2 3 4 5 6 7 8 9 10 11 12 13 typedef struct { uint32_t id; char username[COLUMN_USERNAME_SIZE]; char email[COLUMN_EMAIL_SIZE]; } Row; typedef struct { uint32_t num_rows; void * pages[TABLE_MAX_PAGES]; } Table;
将一张表分为数个page,这里的page和操作系统提供的分页机制相吻合,都是4KB大小,这样可以从操作系统层面上提高数据库的效率。在预处理中已经将命令的参数存入一行中了。(statement->row),现在需要获得数据要存入的内存地址和存入该内存地址的方法。
分配内存:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 Row* row_slot (Table* table, uint32_t row_num) { uint32_t page_num = row_num / ROWS_PER_PAGE; void * page = table->pages[page_num]; if (page == NULL ) { page = table->pages[page_num] = malloc (PAGE_SIZE); } uint32_t row_offset = row_num % ROWS_PER_PAGE; uint32_t byte_offset = row_offset * ROW_SIZE; return (Row*)((uint8_t *)page + byte_offset); }
这个函数返回一个指向要插入的内存地址的指针。
插入内存:
1 2 3 4 5 6 7 8 9 void serialize_row (Row* source, Row* destination) { memcpy ((uint8_t *)destination + ID_OFFSET, &(source->id), ID_SIZE); memcpy ((uint8_t *)destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE); memcpy ((uint8_t *)destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE); }
这里将destination从row* 转换为 **(uint8_t*)**来将其单位转化为字节,这样下面加上各个属性的offset之后就正确地指向对应地内存位置了。第三个参数代表赋值地字节数。这里终于意识到c语言指针地强大之处了。
同理该有一个相反地函数用于输出数据:
1 2 3 4 5 6 void deserialize_row (Row* source, Row* destination) { memcpy (&(destination->id), (uint8_t *)source + ID_OFFSET, ID_SIZE); memcpy (&(destination->username), (uint8_t *)source + USERNAME_OFFSET, USERNAME_SIZE); memcpy (&(destination->email), (uint8_t *)source + EMAIL_OFFSET, EMAIL_SIZE); }
这样与内存的底层交互就完成了,现在只需要得到offset即可。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 #define COLUMN_USERNAME_SIZE 32 #define COLUMN_EMAIL_SIZE 255 #define TABLE_MAX_PAGES 100 #define PAGE_SIZE 4096 #define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute) const uint32_t ID_SIZE = size_of_attribute (Row, id);const uint32_t USERNAME_SIZE = size_of_attribute (Row, username);const uint32_t EMAIL_SIZE = size_of_attribute (Row, email);const uint32_t ID_OFFSET = 0 ;const uint32_t USERNAME_OFFSET = 0 + size_of_attribute (Row, id);const uint32_t EMAIL_OFFSET = 0 + size_of_attribute (Row, id) + size_of_attribute (Row, username);const uint32_t ROW_SIZE = size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email);const uint32_t ROWS_PER_PAGE = PAGE_SIZE /(size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email));const uint32_t TABLE_MAX_ROWS = PAGE_SIZE / (size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email)) * TABLE_MAX_PAGES;
代码 CDB.h 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 #include <stdio.h> #include <stdlib.h> #include <stdbool.h> #include <string.h> #include <stdint.h> #define COLUMN_USERNAME_SIZE 32 #define COLUMN_EMAIL_SIZE 255 #define TABLE_MAX_PAGES 100 #define PAGE_SIZE 4096 #define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute) typedef struct { char * buffer; size_t buffer_length; size_t input_length; } InputBuffer; typedef enum { META_COMMAND_EXIT, META_COMMAND_SUCCESS, META_COMMAND_UNRECOGNIZED_COMMAND } MetaCommandResult; typedef enum { PREPARE_SUCCESS, PREPARE_NEGATIVE_ID, PREPARE_SYNTAX_ERROR, PREPARE_STRING_TOO_LONG, PREPARE_TOO_MANY_PARAMETER, PREPARE_UNRECOGNIZED_STATEMENT } PrepareResult; typedef enum { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL }ExecuteResult; typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType; typedef struct { uint32_t id; char username[COLUMN_USERNAME_SIZE + 1 ]; char email[COLUMN_EMAIL_SIZE + 1 ]; } Row; typedef struct { StatementType type; Row row_to_insert; } Statement; const uint32_t ID_SIZE = size_of_attribute (Row, id);const uint32_t USERNAME_SIZE = size_of_attribute (Row, username);const uint32_t EMAIL_SIZE = size_of_attribute (Row, email);const uint32_t ID_OFFSET = 0 ;const uint32_t USERNAME_OFFSET = 0 + size_of_attribute (Row, id);const uint32_t EMAIL_OFFSET = 0 + size_of_attribute (Row, id) + size_of_attribute (Row, username);const uint32_t ROW_SIZE = size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email);const uint32_t ROWS_PER_PAGE = PAGE_SIZE / (size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email));const uint32_t TABLE_MAX_ROWS = PAGE_SIZE / (size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email)) * TABLE_MAX_PAGES;typedef struct { uint32_t num_rows; void * pages[TABLE_MAX_PAGES]; } Table; InputBuffer* new_input_buffer () ;void print_prompt () ;size_t getline (char ** buffer, size_t * n) ;void read_input (InputBuffer* input_buffer) ;void close_input_buffer (InputBuffer* input_buffer) ;MetaCommandResult do_meta_command (InputBuffer* input_buffer) ;PrepareResult prepare_statement (InputBuffer* input_buffer, Statement* statement) ;void prepare_trim (InputBuffer* input_buffer) ;PrepareResult prepare_insert (InputBuffer* input_buffer, Statement* statement) ;ExecuteResult execute_statement (Statement* statement, Table* table) ;ExecuteResult execute_insert (Statement* statement, Table* table) ;ExecuteResult execute_select (Statement* statement, Table* table) ;void serialize_row (Row* source, Row* destination) ;void deserialize_row (Row* source, Row* destination) ;Row* row_slot (Table* table, uint32_t row_num) ;Table* new_table () ;void free_table (Table* table) ;void print_row (Row* row) ;
CDB.c 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 #define _CRT_SECURE_NO_WARNINGS #include "CDB.h" InputBuffer* new_input_buffer () { InputBuffer* input_buffer = (InputBuffer*)malloc (sizeof (InputBuffer)); if (input_buffer) { input_buffer->buffer = NULL ; input_buffer->buffer_length = 0 ; input_buffer->input_length = 0 ; return input_buffer; } exit (EXIT_FAILURE); } void print_prompt () { printf ("CDB >> " ); } size_t getline (char ** buffer, size_t * n) { char c = 0 ; size_t bytes_read = 0 ; uint32_t COLUMN_MAX_SIZE = COLUMN_USERNAME_SIZE + COLUMN_EMAIL_SIZE + 100 ; char * buffer_temp = (char *)malloc (COLUMN_MAX_SIZE * sizeof (char )); if (buffer_temp == NULL ) { exit (EXIT_FAILURE); } while ((c = getchar ()) && c != '\n' ) { *(buffer_temp + bytes_read++) = c; if (bytes_read == COLUMN_MAX_SIZE) { printf ("too long\n" ); exit (EXIT_FAILURE); } } *buffer = buffer_temp; return bytes_read; } void read_input (InputBuffer* input_buffer) { size_t bytes_read = getline (&(input_buffer->buffer), &(input_buffer->buffer_length)); if (bytes_read < 0 ) { printf ("Error reading input\n" ); exit (EXIT_FAILURE); } input_buffer->input_length = bytes_read; input_buffer->buffer[bytes_read] = 0 ; } void close_input_buffer (InputBuffer* input_buffer) { free (input_buffer->buffer); free (input_buffer); printf ("关闭输入\n" ); } MetaCommandResult do_meta_command (InputBuffer* input_buffer) { if (strcmp (input_buffer->buffer, ".exit" ) == 0 ) { return META_COMMAND_EXIT; } else if (strcmp (input_buffer->buffer, ".q" ) == 0 ) { return META_COMMAND_EXIT; } else if (strcmp (input_buffer->buffer, ".v" ) == 0 ) { printf ("CDB version 0.1\n" ); return META_COMMAND_SUCCESS; } else { return META_COMMAND_UNRECOGNIZED_COMMAND; } } PrepareResult prepare_statement (InputBuffer* input_buffer, Statement* statement) { prepare_trim (input_buffer); if (strncmp (input_buffer->buffer, "insert" , 6 ) == 0 ) { return prepare_insert (input_buffer, statement); } if (strncmp (input_buffer->buffer, "select" , 6 ) == 0 ) { statement->type = STATEMENT_SELECT; return PREPARE_SUCCESS; } return PREPARE_UNRECOGNIZED_STATEMENT; } void prepare_trim (InputBuffer* input_buffer) { char * temp = (char *)malloc (strlen (input_buffer->buffer) * sizeof (char )); char * p1 = temp; char * p2 = input_buffer->buffer; while ((*p2) == ' ' ) { p2++; } bool flag = true ; while ((*p2) != 0 ) { if (((*p2) == ' ' ) && (flag == true )) { *p1 = ' ' ; p1++; p2++; flag = false ; } else if ((*p2) != ' ' ) { *p1 = *p2; p1++; p2++; flag = true ; } else { p2++; } } *p1 = 0 ; free (input_buffer->buffer); input_buffer->buffer = temp; } PrepareResult prepare_insert (InputBuffer* input_buffer, Statement* statement) { statement->type = STATEMENT_INSERT; char * keyword = strtok (input_buffer->buffer, " " ); char * id_string = strtok (NULL , " " ); char * username = strtok (NULL , " " ); char * email = strtok (NULL , " " ); char * test_parameter = strtok (NULL , "" ); if (id_string == NULL || username == NULL || email == NULL ) { return PREPARE_SYNTAX_ERROR; } if (test_parameter != NULL ) { return PREPARE_TOO_MANY_PARAMETER; } int id = atoi (id_string); if (strlen (username) > COLUMN_USERNAME_SIZE) { return PREPARE_STRING_TOO_LONG; } if (strlen (email) > COLUMN_EMAIL_SIZE) { return PREPARE_STRING_TOO_LONG; } statement->row_to_insert.id = id; strcpy (statement->row_to_insert.username, username); strcpy (statement->row_to_insert.email, email); return PREPARE_SUCCESS; } ExecuteResult execute_statement (Statement* statement, Table* table) { switch (statement->type) { case (STATEMENT_INSERT): return execute_insert (statement, table); case (STATEMENT_SELECT): return execute_select (statement, table); } } ExecuteResult execute_insert (Statement* statement, Table* table) { if (table->num_rows >= TABLE_MAX_ROWS) { return EXECUTE_TABLE_FULL; } Row* row_to_insert = &(statement->row_to_insert); serialize_row (row_to_insert, row_slot (table, table->num_rows)); table->num_rows += 1 ; return EXECUTE_SUCCESS; } ExecuteResult execute_select (Statement* statement, Table* table) { Row row; for (uint32_t i = 0 ; i < table->num_rows; i++) { deserialize_row (row_slot (table, i), &row); print_row (&row); } return EXECUTE_SUCCESS; } void serialize_row (Row* source, Row* destination) { memcpy ((uint8_t *)destination + ID_OFFSET, &(source->id), ID_SIZE); memcpy ((uint8_t *)destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE); memcpy ((uint8_t *)destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE); } void deserialize_row (Row* source, Row* destination) { memcpy (&(destination->id), (uint8_t *)source + ID_OFFSET, ID_SIZE); memcpy (&(destination->username), (uint8_t *)source + USERNAME_OFFSET, USERNAME_SIZE); memcpy (&(destination->email), (uint8_t *)source + EMAIL_OFFSET, EMAIL_SIZE); } Row* row_slot (Table* table, uint32_t row_num) { uint32_t page_num = row_num / ROWS_PER_PAGE; void * page = table->pages[page_num]; if (page == NULL ) { page = table->pages[page_num] = malloc (PAGE_SIZE); } uint32_t row_offset = row_num % ROWS_PER_PAGE; uint32_t byte_offset = row_offset * ROW_SIZE; return (Row*)((uint8_t *)page + byte_offset); } Table* new_table () { Table* table = (Table*)malloc (sizeof (Table)); table->num_rows = 0 ; for (uint32_t i = 0 ; i < TABLE_MAX_PAGES; i++) { table->pages[i] = NULL ; } return table; } void free_table (Table* table) { for (int i = 0 ; table->pages[i]; i++) { free (table->pages[i]); } free (table); } void print_row (Row* row) { printf ("| id: %u username: %s email: %s |\n" , row->id, row->username, row->email); }
main.c 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 #include "CDB.h" int main () { Table* table = new_table (); InputBuffer* input_buffer = new_input_buffer (); while (true ) { print_prompt (); read_input (input_buffer); if (input_buffer->input_length == 0 ) { continue ; } else if (input_buffer->buffer[0 ] == '.' ) { switch (do_meta_command (input_buffer)) { case (META_COMMAND_EXIT): close_input_buffer (input_buffer); exit (EXIT_SUCCESS); break ; case (META_COMMAND_SUCCESS): break ; case (META_COMMAND_UNRECOGNIZED_COMMAND): printf ("Unrecognized command '%s'\n" , input_buffer->buffer); break ; } } else if (input_buffer->buffer[0 ] != '.' ) { Statement statement; switch (prepare_statement (input_buffer, &statement)) { case (PREPARE_SUCCESS): break ; case (PREPARE_STRING_TOO_LONG): printf ("String is too long.\n" ); continue ; case (PREPARE_NEGATIVE_ID): printf ("your id id negative.\n" ); continue ; case (PREPARE_TOO_MANY_PARAMETER): printf ("too many parameter.\n" ); continue ; case (PREPARE_UNRECOGNIZED_STATEMENT): printf ("Unrecognized keyword at start of '%s'.\n" , input_buffer->buffer); continue ; case (PREPARE_SYNTAX_ERROR): printf ("Syntax error. Could not parse statement.\n" ); continue ; } switch (execute_statement (&statement, table)) { case (EXECUTE_SUCCESS): printf ("Executed.\n" ); break ; case (EXECUTE_TABLE_FULL): printf ("Error: Table full.\n" ); break ; } } else { printf ("error\n" ); exit (EXIT_SUCCESS); } } }
第三步 保存到硬盘 这一步,我们要将内存中的数据库存入磁盘中。
数据定义 首先从程序入口处得知数据库的文件名,当前一个数据库只有一张表,一个文件。即这样:
1 2 3 4 5 6 7 8 9 int main (int argc,char * argv[]) { if (argc <= 1 ) { printf ("Must supply a database filename.\n" ); exit (EXIT_FAILURE); } char * filename = argv[1 ]; }
进行如下定义:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 typedef struct { int file_descriptor; uint32_t file_length; void * pages[TABLE_MAX_PAGES]; } Pager; typedef struct { uint32_t num_rows; Pager* pager; } Table;
一个表内包含一个pager用来管理数据库中的page,另外还需要存储数据库中的条目数量。一个pager就是一个文件的索引,负责存储对应的文件句柄,并且存储pages,一张表可以用有很多个pager,一个pager存储一个文件,这样就可以让一张表使用多个文件来存储了。另外,这里pager用到了操作系统中缓存的概念,其中每一个page都指向内存中的一块区域,但是如果对应的page没有在内存中(NULL)就需要进行内存和和硬盘的交换。
程序要连接数据库,因为目前数据库中只有一张表故连接这一张表就好。所以 db_open 自然就是 table_open .
table中的pager需要先进行初始化,然后利用pager中的file_length就可以得到table中的 num_rows.
对于pager,首先需要根据上层传来的filename打开文件,然后根据文件信息算出来 file_length, 由此得出 page的数量,根据数量将 void* pages[TABLE_MAX_PAGES]; 全部指向null,因为当前内存中含没有任何数据库数据。
这杨初始化的工作就完成了。
内存替换 这里实现了一个简单的抽象内存管理模块,入口:pager page_num 出口:在内存中找到或者开辟一块page区域,并以指针的形式返回。
加入要使用的页(根据页号来判断)不在内存中则发生缺页中断,这时在内存中开辟一个页空间,并且判断磁盘中是否有该页(根据该页号和磁盘中的页数量作比较),若有的话则读取磁盘内容填充该页的内容,若没有的话则什么都不做直接饭返回这个空页即可。
这样就在内存中得到了想要的页。
同理,反过来在结束程序的时候还需要将内存中更新的页和磁盘中的相同步。
程序抽象 现在使用光标进行程序的抽象,即使用光标标识某一个行,使用一个函数获取该光标真实表示的行(在内存中),当然,若内存中本就不存在的话则使用上面提到的内存替换策略。
1 2 3 4 5 6 7 8 9 10 11 12 13 typedef struct { uint32_t num_rows; Pager* pager; } Table; typedef struct { Table* table; uint32_t row_num; bool end_of_table; } Cursor;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 Row* cursor_value (Cursor* cursor) { uint32_t row_num = cursor->row_num; uint32_t page_num = row_num / ROWS_PER_PAGE; void * page = get_page (cursor->table->pager, page_num); uint32_t row_offset = row_num % ROWS_PER_PAGE; uint32_t byte_offset = row_offset * ROW_SIZE; return (Row*)((uint8_t *)page + byte_offset); }
代码 CDB.h 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 #include <stdio.h> #include <stdlib.h> #include <stdbool.h> #include <string.h> #include <stdint.h> #include <io.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #define COLUMN_USERNAME_SIZE 32 #define COLUMN_EMAIL_SIZE 255 #define TABLE_MAX_PAGES 100 #define PAGE_SIZE 4096 #define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute) typedef struct { char * buffer; size_t buffer_length; size_t input_length; } InputBuffer; typedef enum { META_COMMAND_EXIT, META_COMMAND_SUCCESS, META_COMMAND_UNRECOGNIZED_COMMAND } MetaCommandResult; typedef enum { PREPARE_SUCCESS, PREPARE_NEGATIVE_ID, PREPARE_SYNTAX_ERROR, PREPARE_STRING_TOO_LONG, PREPARE_TOO_MANY_PARAMETER, PREPARE_UNRECOGNIZED_STATEMENT } PrepareResult; typedef enum { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL }ExecuteResult; typedef enum { STATEMENT_INSERT, STATEMENT_SELECT } StatementType; typedef struct { uint32_t id; char username[COLUMN_USERNAME_SIZE + 1 ]; char email[COLUMN_EMAIL_SIZE + 1 ]; } Row; typedef struct { StatementType type; Row row_to_insert; } Statement; const uint32_t ID_SIZE = size_of_attribute (Row, id);const uint32_t USERNAME_SIZE = size_of_attribute (Row, username);const uint32_t EMAIL_SIZE = size_of_attribute (Row, email);const uint32_t ID_OFFSET = 0 ;const uint32_t USERNAME_OFFSET = 0 + size_of_attribute (Row, id);const uint32_t EMAIL_OFFSET = 0 + size_of_attribute (Row, id) + size_of_attribute (Row, username);const uint32_t ROW_SIZE = size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email);const uint32_t ROWS_PER_PAGE = PAGE_SIZE / (size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email));const uint32_t TABLE_MAX_ROWS = PAGE_SIZE / (size_of_attribute (Row, id) + size_of_attribute (Row, username) + size_of_attribute (Row, email)) * TABLE_MAX_PAGES;typedef struct { int file_descriptor; uint32_t file_length; void * pages[TABLE_MAX_PAGES]; } Pager; typedef struct { uint32_t num_rows; Pager* pager; } Table; typedef struct { Table* table; uint32_t row_num; bool end_of_table; } Cursor; InputBuffer* new_input_buffer () ;void print_prompt () ;size_t getline (char ** buffer, size_t * n) ;void read_input (InputBuffer* input_buffer) ;void close_input_buffer (InputBuffer* input_buffer) ;MetaCommandResult do_meta_command (InputBuffer* input_buffer, Table* table) ;PrepareResult prepare_statement (InputBuffer* input_buffer, Statement* statement) ;void prepare_trim (InputBuffer* input_buffer) ;PrepareResult prepare_insert (InputBuffer* input_buffer, Statement* statement) ;ExecuteResult execute_statement (Statement* statement, Table* table) ;ExecuteResult execute_insert (Statement* statement, Table* table) ;ExecuteResult execute_select (Statement* statement, Table* table) ;void serialize_row (Row* source, Row* destination) ;void deserialize_row (Row* source, Row* destination) ;Row* cursor_value (Cursor* cursor) ;void * get_page (Pager* pager, uint32_t page_num) ;void pager_flush (Pager* pager, uint32_t page_num, uint32_t size) ;Table* db_open (const char * filename) ;Pager* pager_open (const char * filename) ;void db_close (Table* table) ;void free_table (Table* table) ;void print_row (Row* row) ;Cursor* table_start (Table* table) ;Cursor* table_end (Table* table) ;void cursor_advance (Cursor* cursor) ;
CDB.cpp 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 #define _CRT_SECURE_NO_WARNINGS #include "CDB.h" InputBuffer* new_input_buffer () { InputBuffer* input_buffer = (InputBuffer*)malloc (sizeof (InputBuffer)); if (input_buffer) { input_buffer->buffer = NULL ; input_buffer->buffer_length = 0 ; input_buffer->input_length = 0 ; return input_buffer; } exit (EXIT_FAILURE); } void print_prompt () { printf ("CDB >> " ); } size_t getline (char ** buffer, size_t * n) { char c = 0 ; size_t bytes_read = 0 ; uint32_t COLUMN_MAX_SIZE = COLUMN_USERNAME_SIZE + COLUMN_EMAIL_SIZE + 100 ; char * buffer_temp = (char *)malloc (COLUMN_MAX_SIZE * sizeof (char )); if (buffer_temp == NULL ) { exit (EXIT_FAILURE); } while ((c = getchar ()) && c != '\n' ) { *(buffer_temp + bytes_read++) = c; if (bytes_read == COLUMN_MAX_SIZE) { printf ("too long\n" ); exit (EXIT_FAILURE); } } *buffer = buffer_temp; return bytes_read; } void read_input (InputBuffer* input_buffer) { size_t bytes_read = getline (&(input_buffer->buffer), &(input_buffer->buffer_length)); if (bytes_read < 0 ) { printf ("Error reading input\n" ); exit (EXIT_FAILURE); } input_buffer->input_length = bytes_read; input_buffer->buffer[bytes_read] = 0 ; } void close_input_buffer (InputBuffer* input_buffer) { free (input_buffer->buffer); free (input_buffer); printf ("关闭输入\n" ); } MetaCommandResult do_meta_command (InputBuffer* input_buffer, Table* table) { if (strcmp (input_buffer->buffer, ".exit" ) == 0 ) { db_close (table); return META_COMMAND_EXIT; } else if (strcmp (input_buffer->buffer, ".q" ) == 0 ) { db_close (table); return META_COMMAND_EXIT; } else if (strcmp (input_buffer->buffer, ".v" ) == 0 ) { printf ("CDB version 0.1\n" ); return META_COMMAND_SUCCESS; } else { return META_COMMAND_UNRECOGNIZED_COMMAND; } } PrepareResult prepare_statement (InputBuffer* input_buffer, Statement* statement) { prepare_trim (input_buffer); if (strncmp (input_buffer->buffer, "insert" , 6 ) == 0 ) { return prepare_insert (input_buffer, statement); } if (strncmp (input_buffer->buffer, "select" , 6 ) == 0 ) { statement->type = STATEMENT_SELECT; return PREPARE_SUCCESS; } return PREPARE_UNRECOGNIZED_STATEMENT; } void prepare_trim (InputBuffer* input_buffer) { char * temp = (char *)malloc (strlen (input_buffer->buffer) * sizeof (char )); char * p1 = temp; char * p2 = input_buffer->buffer; while ((*p2) == ' ' ) { p2++; } bool flag = true ; while ((*p2) != 0 ) { if (((*p2) == ' ' ) && (flag == true )) { *p1 = ' ' ; p1++; p2++; flag = false ; } else if ((*p2) != ' ' ) { *p1 = *p2; p1++; p2++; flag = true ; } else { p2++; } } *p1 = 0 ; free (input_buffer->buffer); input_buffer->buffer = temp; } PrepareResult prepare_insert (InputBuffer* input_buffer, Statement* statement) { statement->type = STATEMENT_INSERT; char * keyword = strtok (input_buffer->buffer, " " ); char * id_string = strtok (NULL , " " ); char * username = strtok (NULL , " " ); char * email = strtok (NULL , " " ); char * test_parameter = strtok (NULL , "" ); if (id_string == NULL || username == NULL || email == NULL ) { return PREPARE_SYNTAX_ERROR; } if (test_parameter != NULL ) { return PREPARE_TOO_MANY_PARAMETER; } int id = atoi (id_string); if (strlen (username) > COLUMN_USERNAME_SIZE) { return PREPARE_STRING_TOO_LONG; } if (strlen (email) > COLUMN_EMAIL_SIZE) { return PREPARE_STRING_TOO_LONG; } statement->row_to_insert.id = id; strcpy (statement->row_to_insert.username, username); strcpy (statement->row_to_insert.email, email); return PREPARE_SUCCESS; } ExecuteResult execute_statement (Statement* statement, Table* table) { switch (statement->type) { case (STATEMENT_INSERT): return execute_insert (statement, table); case (STATEMENT_SELECT): return execute_select (statement, table); } } ExecuteResult execute_insert (Statement* statement, Table* table) { if (table->num_rows >= TABLE_MAX_ROWS) { return EXECUTE_TABLE_FULL; } Row* row_to_insert = &(statement->row_to_insert); Cursor* cursor = table_end (table); serialize_row (row_to_insert, cursor_value (cursor)); table->num_rows += 1 ; return EXECUTE_SUCCESS; } ExecuteResult execute_select (Statement* statement, Table* table) { Cursor* cursor = table_start (table); Row row; while (!(cursor->end_of_table)) { deserialize_row (cursor_value (cursor), &row); print_row (&row); cursor_advance (cursor); } free (cursor); return EXECUTE_SUCCESS; } void serialize_row (Row* source, Row* destination) { memcpy ((uint8_t *)destination + ID_OFFSET, &(source->id), ID_SIZE); memcpy ((uint8_t *)destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE); memcpy ((uint8_t *)destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE); } void deserialize_row (Row* source, Row* destination) { memcpy (&(destination->id), (uint8_t *)source + ID_OFFSET, ID_SIZE); memcpy (&(destination->username), (uint8_t *)source + USERNAME_OFFSET, USERNAME_SIZE); memcpy (&(destination->email), (uint8_t *)source + EMAIL_OFFSET, EMAIL_SIZE); } Row* cursor_value (Cursor* cursor) { uint32_t row_num = cursor->row_num; uint32_t page_num = row_num / ROWS_PER_PAGE; void * page = get_page (cursor->table->pager, page_num); uint32_t row_offset = row_num % ROWS_PER_PAGE; uint32_t byte_offset = row_offset * ROW_SIZE; return (Row*)((uint8_t *)page + byte_offset); } void * get_page (Pager* pager, uint32_t page_num) { if (page_num > TABLE_MAX_PAGES) { printf ("Tried to fetch page number out of bounds. %d > %d\n" , page_num, TABLE_MAX_PAGES); exit (EXIT_FAILURE); } else if (pager->pages[page_num] == NULL ) { void * page = malloc (PAGE_SIZE); if (!page) { printf ("load page error.\n" ); exit (EXIT_FAILURE); } uint32_t num_pages = pager->file_length / PAGE_SIZE; if (pager->file_length % PAGE_SIZE) { num_pages += 1 ; } if (page_num <= num_pages - 1 ) { _lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET); size_t bytes_read = _read(pager->file_descriptor, page, PAGE_SIZE); if (bytes_read == -1 ) { printf ("Error reading file: %d\n" , errno); exit (EXIT_FAILURE); } } pager->pages[page_num] = page; } return pager->pages[page_num]; } Table* db_open (const char * filename) { Pager* pager = pager_open (filename); uint32_t num_rows = pager->file_length / ROW_SIZE; Table* table = (Table*)malloc (sizeof (Table)); table->pager = pager; table->num_rows = num_rows; return table; } Pager* pager_open (const char * filename) { int fd = _open(filename, O_RDWR | O_CREAT, _S_IREAD | _S_IWRITE); if (fd == -1 ) { printf ("Unable to open file\n" ); exit (EXIT_FAILURE); } off_t file_length = _lseek(fd, 0 , SEEK_END); Pager* pager = (Pager*)malloc (sizeof (Pager)); pager->file_descriptor = fd; pager->file_length = file_length; for (uint32_t i = 0 ; i < TABLE_MAX_PAGES; i++) { pager->pages[i] = NULL ; } return pager; } void pager_flush (Pager* pager, uint32_t page_num, uint32_t size) { if (pager->pages[page_num] == NULL ) { printf ("Tried to flush null page\n" ); exit (EXIT_FAILURE); } off_t offset = _lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET); if (offset == -1 ) { printf ("Error seeking: %d\n" , errno); exit (EXIT_FAILURE); } size_t bytes_written = _write(pager->file_descriptor, pager->pages[page_num], size); if (bytes_written == -1 ) { printf ("Error writing: %d\n" , errno); exit (EXIT_FAILURE); } } void db_close (Table* table) { Pager* pager = table->pager; uint32_t num_full_pages = table->num_rows / ROWS_PER_PAGE; for (uint32_t i = 0 ; i < num_full_pages; i++) { if (pager->pages[i] == NULL ) { continue ; } pager_flush (pager, i, PAGE_SIZE); free (pager->pages[i]); pager->pages[i] = NULL ; } uint32_t num_additional_rows = table->num_rows % ROWS_PER_PAGE; if (num_additional_rows > 0 ) { uint32_t page_num = num_full_pages; if (pager->pages[page_num] != NULL ) { pager_flush (pager, page_num, num_additional_rows * ROW_SIZE); free (pager->pages[page_num]); pager->pages[page_num] = NULL ; } } int result = _close(pager->file_descriptor); if (result == -1 ) { printf ("Error closing db file.\n" ); exit (EXIT_FAILURE); } for (uint32_t i = 0 ; i < TABLE_MAX_PAGES; i++) { void * page = pager->pages[i]; if (page) { free (page); pager->pages[i] = NULL ; } } free (pager); free (table); } void free_table (Table* table) { for (int i = 0 ; table->pager->pages[i]; i++) { free (table->pager->pages[i]); } free (table->pager); free (table); } void print_row (Row* row) { printf ("| id: %u username: %s email: %s |\n" , row->id, row->username, row->email); } Cursor* table_start (Table* table) { Cursor* cursor = (Cursor*)malloc (sizeof (Cursor)); cursor->table = table; cursor->row_num = 0 ; cursor->end_of_table = (table->num_rows == 0 ); return cursor; } Cursor* table_end (Table* table) { Cursor* cursor = (Cursor*)malloc (sizeof (Cursor)); cursor->table = table; cursor->row_num = table->num_rows; cursor->end_of_table = true ; return cursor; } void cursor_advance (Cursor* cursor) { cursor->row_num += 1 ; if (cursor->row_num >= cursor->table->num_rows) { cursor->end_of_table = true ; } }
main.c 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 #include "CDB.h" int main (int argc, char * argv[]) { if (argc <= 1 ) { printf ("Must supply a database filename.\n" ); exit (EXIT_FAILURE); } char * filename = argv[1 ]; Table* table = db_open (filename); InputBuffer* input_buffer = new_input_buffer (); while (true ) { print_prompt (); read_input (input_buffer); if (input_buffer->input_length == 0 ) { continue ; } else if (input_buffer->buffer[0 ] == '.' ) { switch (do_meta_command (input_buffer, table)) { case (META_COMMAND_EXIT): close_input_buffer (input_buffer); exit (EXIT_SUCCESS); break ; case (META_COMMAND_SUCCESS): break ; case (META_COMMAND_UNRECOGNIZED_COMMAND): printf ("Unrecognized command '%s'\n" , input_buffer->buffer); break ; } } else if (input_buffer->buffer[0 ] != '.' ) { Statement statement; switch (prepare_statement (input_buffer, &statement)) { case (PREPARE_SUCCESS): break ; case (PREPARE_STRING_TOO_LONG): printf ("String is too long.\n" ); continue ; case (PREPARE_NEGATIVE_ID): printf ("your id id negative.\n" ); continue ; case (PREPARE_TOO_MANY_PARAMETER): printf ("too many parameter.\n" ); continue ; case (PREPARE_UNRECOGNIZED_STATEMENT): printf ("Unrecognized keyword at start of '%s'.\n" , input_buffer->buffer); continue ; case (PREPARE_SYNTAX_ERROR): printf ("Syntax error. Could not parse statement.\n" ); continue ; } switch (execute_statement (&statement, table)) { case (EXECUTE_SUCCESS): printf ("Executed.\n" ); break ; case (EXECUTE_TABLE_FULL): printf ("Error: Table full.\n" ); break ; } } else { printf ("error\n" ); exit (EXIT_SUCCESS); } } }
第三步 使用 B TREE