面試官:一千萬數(shù)據(jù),怎么快速查詢?
閱讀本文大概需要 5 分鐘。
來自:juejin.cn/post/6863668253898735629
前言
面試官:來說說,一千萬的數(shù)據(jù),你是怎么查詢的? B哥:直接分頁查詢,使用limit分頁。 面試官:有實操過嗎? B哥:肯定有呀
準(zhǔn)備數(shù)據(jù)
創(chuàng)建表
CREATE?TABLE?`user_operation_log`??(
??`id`?int(11)?NOT?NULL?AUTO_INCREMENT,
??`user_id`?varchar(64)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`ip`?varchar(20)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`op_data`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr1`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr2`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr3`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr4`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr5`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr6`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr7`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr8`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr9`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr10`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr11`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??`attr12`?varchar(255)?CHARACTER?SET?utf8mb4?COLLATE?utf8mb4_general_ci?NULL?DEFAULT?NULL,
??PRIMARY?KEY?(`id`)?USING?BTREE
)?ENGINE?=?InnoDB?AUTO_INCREMENT?=?1?CHARACTER?SET?=?utf8mb4?COLLATE?=?utf8mb4_general_ci?ROW_FORMAT?=?Dynamic;
創(chuàng)建數(shù)據(jù)腳本
DELIMITER?;;
CREATE?PROCEDURE?batch_insert_log()
BEGIN
??DECLARE?i?INT?DEFAULT?1;
??DECLARE?userId?INT?DEFAULT?10000000;
?set?@execSql?=?'INSERT?INTO?`test`.`user_operation_log`(`user_id`,?`ip`,?`op_data`,?`attr1`,?`attr2`,?`attr3`,?`attr4`,?`attr5`,?`attr6`,?`attr7`,?`attr8`,?`attr9`,?`attr10`,?`attr11`,?`attr12`)?VALUES';
?set?@execData?=?'';
??WHILE?i<=10000000?DO
???set?@attr?=?"'測試很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長很長的屬性'";
??set?@execData?=?concat(@execData,?"(",?userId?+?i,?",?'10.0.69.175',?'用戶登錄操作'",?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?",",?@attr,?")");
??if?i?%?1000?=?0
??then
?????set?@stmtSql?=?concat(@execSql,?@execData,";");
????prepare?stmt?from?@stmtSql;
????execute?stmt;
????DEALLOCATE?prepare?stmt;
????commit;
????set?@execData?=?"";
???else
?????set?@execData?=?concat(@execData,?",");
???end?if;
??SET?i=i+1;
??END?WHILE;
END;;
DELIMITER?;
SELECT?count(1)?FROM?`user_operation_log`
14060 ms 13755 ms 13447 ms
普通分頁查詢
SELECT?*?FROM?table?LIMIT?[offset,]?rows?|?rows?OFFSET?offset
第一個參數(shù)指定第一個返回記錄行的偏移量 第二個參數(shù)指定返回記錄行的最大數(shù)目
SELECT?*?FROM?`user_operation_log`?LIMIT?10000,?10
59 ms 49 ms 50 ms
相同偏移量,不同數(shù)據(jù)量
SELECT?*?FROM?`user_operation_log`?LIMIT?10000,?10
SELECT?*?FROM?`user_operation_log`?LIMIT?10000,?100
SELECT?*?FROM?`user_operation_log`?LIMIT?10000,?1000
SELECT?*?FROM?`user_operation_log`?LIMIT?10000,?10000
SELECT?*?FROM?`user_operation_log`?LIMIT?10000,?100000
SELECT?*?FROM?`user_operation_log`?LIMIT?10000,?1000000

相同數(shù)據(jù)量,不同偏移量
SELECT?*?FROM?`user_operation_log`?LIMIT?100,?100
SELECT?*?FROM?`user_operation_log`?LIMIT?1000,?100
SELECT?*?FROM?`user_operation_log`?LIMIT?10000,?100
SELECT?*?FROM?`user_operation_log`?LIMIT?100000,?100
SELECT?*?FROM?`user_operation_log`?LIMIT?1000000,?100

SELECT?*?FROM?`user_operation_log`?LIMIT?100,?100
SELECT?id,?attr?FROM?`user_operation_log`?LIMIT?100,?100
如何優(yōu)化
優(yōu)化偏移量大問題
采用子查詢方式
SELECT?*?FROM?`user_operation_log`?LIMIT?1000000,?10SELECT?id?FROM?`user_operation_log`?LIMIT?1000000,?1SELECT?*?FROM?`user_operation_log`?WHERE?id?>=?(SELECT?id?FROM?`user_operation_log`?LIMIT?1000000,?1)?LIMIT?10

第一條花費的時間最大,第三條比第一條稍微好點 子查詢使用索引速度更快
“ 注意:某些 mysql 版本不支持在 in 子句中使用 limit,所以采用了多個嵌套select
SELECT?*?FROM?`user_operation_log`?WHERE?id?IN?(SELECT?t.id?FROM?(SELECT?id?FROM?`user_operation_log`?LIMIT?1000000,?10)?AS?t)
采用 id 限定方式
SELECT?*?FROM?`user_operation_log`?WHERE?id?between?1000000?AND?1000100?LIMIT?100
SELECT?*?FROM?`user_operation_log`?WHERE?id?>=?1000000?LIMIT?100

“ 注意:這里的 LIMIT 是限制了條數(shù),沒有采用偏移量
優(yōu)化數(shù)據(jù)量大問題
SELECT?*?FROM?`user_operation_log`?LIMIT?1,?1000000
SELECT?id?FROM?`user_operation_log`?LIMIT?1,?1000000
SELECT?id,?user_id,?ip,?op_data,?attr1,?attr2,?attr3,?attr4,?attr5,?attr6,?attr7,?attr8,?attr9,?attr10,?attr11,?attr12?FROM?`user_operation_log`?LIMIT?1,?1000000

SELECT * 它不香嗎?
SELECT *。難道簡單無腦,它不香嗎?用 "SELECT * " 數(shù)據(jù)庫需要解析更多的對象、字段、權(quán)限、屬性等相關(guān)內(nèi)容,在 SQL 語句復(fù)雜,硬解析較多的情況下,會對數(shù)據(jù)庫造成沉重的負(fù)擔(dān)。 增大網(wǎng)絡(luò)開銷,* 有時會誤帶上如log、IconMD5之類的無用且大文本字段,數(shù)據(jù)傳輸size會幾何增漲。特別是MySQL和應(yīng)用程序不在同一臺機器,這種開銷非常明顯。
