萬盛學電腦網

 萬盛學電腦網 >> 數據庫 >> mysql教程 >> Mysql中如何查找並刪除重復數據

Mysql中如何查找並刪除重復數據

在數據庫中我們經常會碰到重復數據了,下面我來介紹利用相關的mysql語句實現查找重復記錄並且實現刪除重復記錄的sql語句。

考試系統中做了一個用戶導入試題功能,導致用戶導入了很多重復的試題,我需要查詢及刪除一下重復的記錄,於是有了這篇文章。

(一)單個字段

1、查找表中多余的重復記錄,根據(question_title)字段來判斷

 代碼如下 復制代碼

select * from questions where question_title in (select question_title from people group by question_title having count(question_title) > 1)

2、刪除表中多余的重復記錄,根據(question_title)字段來判斷,只留有一個記錄

 代碼如下 復制代碼

delete from questions
where peopleId in (select peopleId from people group by peopleId having count(question_title) > 1)
and min(id) not in (select question_id from questions group by question_title having count(question_title)>1)


(二)多個字段

刪除表中多余的重復記錄(多個字段),只留有rowid最小的記錄

 代碼如下 復制代碼

DELETE FROM questions WHERE (questions_title,questions_scope) IN (SELECT questions_title,questions_scope FROM questions GROUP BY questions_title,questions_scope HAVING COUNT(*) > 1) AND question_id NOT IN (SELECT MIN(question_id) FROM questions GROUP BY questions_scope,questions_title HAVING COUNT(*)>1)


用上述語句無法刪除,創建了臨時表才刪的,求各位達人解釋一下。

 

 代碼如下 復制代碼

CREATE TABLE tmp AS SELECT question_id FROM questions WHERE (questions_title,questions_scope) IN (SELECT questions_title,questions_scope FROM questions GROUP BY questions_title,questions_scope HAVING COUNT(*) > 1) AND question_id NOT IN (SELECT MIN(question_id) FROM questions GROUP BY questions_scope,questions_title HAVING COUNT(*)>1);

DELETE FROM questions WHERE question_id IN (SELECT question_id FROM tmp);

DROP TABLE tmp;

(三)

 代碼如下 復制代碼

declare @max integer,@id integer

declare cur_rows cursor local for select 主字段,count(*) from 表名 group by 主字段 having count(*) >; 1

open cur_rows

fetch cur_rows into @id,@max

while @@fetch_status=0

begin

select @max = @max -1

set rowcount @max

delete from 表名 where 主字段 = @id

fetch cur_rows into @id,@max

end

close cur_rows

set rowcount 0

上面講到了很多,下面我們一起來看實例刪除重復記錄實例

例1,表中有主鍵(可唯一標識的字段),且該字段為數字類型

 代碼如下 復制代碼


/* 表結構 */
DROP TABLE IF EXISTS `t1`;
CREATE TABLE IF NOT EXISTS `t1`(
  `id` INT(1) NOT NULL AUTO_INCREMENT,
  `name` VARCHAR(20) NOT NULL,
  `add` VARCHAR(20) NOT NULL,
  PRIMARY KEY(`id`)
)Engine=InnoDB;

/* 插入測試數據 */
INSERT INTO `t1`(`name`,`add`) VALUES
('abc',"123"),
('abc',"123"),
('abc',"321"),
('abc',"123"),
('xzy',"123"),
('xzy',"456"),
('xzy',"456"),
('xzy',"456"),
('xzy',"789"),
('xzy',"987"),
('xzy',"789"),
('ijk',"147"),
('ijk',"147"),
('ijk',"852"),
('opq',"852"),
('opq',"963"),
('opq',"741"),
('tpk',"741"),
('tpk',"963"),
('tpk',"963"),
('wer',"546"),
('wer',"546"),
('once',"546");

SELECT * FROM `t1`;
+----+------+-----+
| id | name | add |
+----+------+-----+
|  1 | abc  | 123 |
|  2 | abc  | 123 |
|  3 | abc  | 321 |
|  4 | abc  | 123 |
|  5 | xzy  | 123 |
|  6 | xzy  | 456 |
|  7 | xzy  | 456 |
|  8 | xzy  | 456 |
|  9 | xzy  | 789 |
| 10 | xzy  | 987 |
| 11 | xzy  | 789 |
| 12 | ijk  | 147 |
| 13 | ijk  | 147 |
| 14 | ijk  | 852 |
| 15 | opq  | 852 |
| 16 | opq  | 963 |
| 17 | opq  | 741 |
| 18 | tpk  | 741 |
| 19 | tpk  | 963 |
| 20 | tpk  | 963 |
| 21 | wer  | 546 |
| 22 | wer  | 546 |
| 23 | once | 546 |
+----+------+-----+
rows in set (0.00 sec)


查找id最小的重復數據(只查找id字段)

 代碼如下 復制代碼

/* 查找id最小的重復數據(只查找id字段) */
SELECT DISTINCT MIN(`id`) AS `id`
FROM `t1`
GROUP BY `name`,`add`
HAVING COUNT(1) > 1;
+------+
| id   |
+------+
|    1 |
|   12 |
|   19 |
|   21 |
|    6 |
|    9 |
+------+
rows in set (0.00 sec)

查找所有重復數據

 代碼如下 復制代碼

/* 查找所有重復數據 */
SELECT `t1`.*
FROM `t1`,(
  SELECT `name`,`add`
  FROM `t1`
  GROUP BY `name`,`add`
  HAVING COUNT(1) > 1
) AS `t2`
WHERE `t1`.`name` = `t2`.`name`
  AND `t1`.`add` = `t2`.`add`;
+----+------+-----+
| id | name | add |
+----+------+-----+
|  1 | abc  | 123 |
|  2 | abc  | 123 |
|  4 | abc  | 123 |
|  6 | xzy  | 456 |
|  7 | xzy  | 456 |
|  8 | xzy  | 456 |
|  9 | xzy  | 789 |
| 11 | xzy  | 789 |
| 12 | ijk  | 147 |
| 13 | ijk  | 147 |
| 19 | tpk  | 963 |
| 20 | tpk  | 963 |
| 21 | wer  | 546 |
| 22 | wer  | 546 |
+----+------+-----+
rows in set (0.00 sec)

查找除id最小的數據外的重復數據

 代碼如下 復制代碼

/* 查找除id最小的數據外的重復數據 */
SELECT `t1`.*
FROM `t1`,(
  SELECT DISTINCT MIN(`id`) AS `id`,`name`,`add`
  FROM `t1`
  GROUP BY `name`,`add`
  HAVING COUNT(1) > 1
) AS `t2`
WHERE `t1`.`name` = `t2`.`name`
  AND `t1`.`add` = `t2`.`add`
  AND `t1`.`id` <> `t2`.`id`;
+----+------+-----+
| id | name | add |
+----+------+-----+
|  2 | abc  | 123 |
|  4 | abc  | 123 |
|  7 | xzy  | 456 |
|  8 | xzy  | 456 |
| 11 | xzy  | 789 |
| 13 | ijk  | 147 |
| 20 | tpk  | 963 |
| 22 | wer  | 546 |
+----+------+-----+
rows in set (0.00 sec)

例2,表中沒有主鍵(可唯一標識的字段),或者主鍵並非數字類型(也可以刪除重復數據,但效率上肯定比較慢)


例2測試數據

 代碼如下 復制代碼

/* 表結構 */
DROP TABLE IF EXISTS `noid`;
CREATE TABLE IF NOT EXISTS `noid`(
  `pk` VARCHAR(20) NOT NULL COMMENT '字符串主鍵',
  `name` VARCHAR(20) NOT NULL,
  `add` VARCHAR(20) NOT NULL,
  PRIMARY KEY(`pk`)
)Engine=InnoDB;

/* 測試數據,與上例一樣的測試數據,只是主鍵變為字符串形式 */
INSERT INTO `noid`(`pk`,`name`,`add`) VALUES
('a','abc',"123"),
('b','abc',"123"),
('c','abc',"321"),
('d','abc',"123"),
('e','xzy',"123"),
('f','xzy',"456"),
('g','xzy',"456"),
('h','xzy',"456"),
('i','xzy',"789"),
('j','xzy',"987"),
('k','xzy',"789"),
('l','ijk',"147"),
('m','ijk',"147"),
('n','ijk',"852"),
('o','opq',"852"),
('p','opq',"963"),
('q','opq',"741"),
('r','tpk',"741"),
('s','tpk',"963"),
('t','tpk',"963"),
('u','wer',"546"),
('v','wer',"546"),
('w','once',"546");

SELECT * FROM `noid`;
+----+------+-----+
| pk | name | add |
+----+------+-----+
| a  | abc  | 123 |
| b  | abc  | 123 |
| c  | abc  | 321 |
| d  | abc  | 123 |
| e  | xzy  | 123 |
| f  | xzy  | 456 |
| g  | xzy  | 456 |
| h  | xzy  | 456 |
| i  | xzy  | 789 |
| j  | xzy  | 987 |
| k  | xzy  | 789 |
| l  | ijk  | 147 |
| m  | ijk  | 147 |
| n  | ijk  | 852 |
| o  | opq  | 852 |
| p  | opq  | 963 |
| q  | opq  | 741 |
| r  | tpk  | 741 |
| s  | tpk  | 963 |
| t  | tpk  | 963 |
| u  | wer  | 546 |
| v  | wer  | 546 |
| w  | once | 546 |
+----+------+-----+
rows in set (0.00 sec)

為表添加自增長的id字段

 代碼如下 復制代碼

/* 為表添加自增長的id字段 */
ALTER TABLE `noid` ADD `id` INT(1) NOT NULL AUTO_INCREMENT, ADD INDEX `id`(`id`);
Query OK, 23 rows affected (0.16 sec)
Records: 23  Duplicates: 0  Warnings: 0

SELECT * FROM `noid`;
+----+------+-----+----+
| pk | name | add | id |
+----+------+-----+----+
| a  | abc  | 123 |  1 |
| b  | abc  | 123 |  2 |
| c  | abc  | 321 |  3 |
| d  | abc  | 123 |  4 |
| e  | xzy  | 123 |  5 |
| f  | xzy  | 456 |  6 |
| g  | xzy  | 456 |  7 |
| h  | xzy  | 456 |  8 |
| i  | xzy  | 789 |  9 |
| j  | xzy  | 987 | 10 |
| k  | xzy  | 789 | 11 |
| l  | ijk  | 147 | 12 |
| m  | ijk  | 147 | 13 |
| n  | ijk  | 852 | 14 |
| o  | opq  | 852 | 15 |
| p  | opq  | 963 | 16 |
| q  | opq  | 741 | 17 |
| r  | tpk  | 741 | 18 |
| s  | tpk  | 963 | 19 |
| t  | tpk  | 963 | 20 |
| u  | wer  | 546 | 21 |
| v  | wer  | 546 | 22 |
| w  | once | 546 | 23 |
+----+------+-----+----+
rows in set (0.00 sec)

MySQL中必須是有索引的字段才可以使用AUTO_INCREMENT

刪除重復數據與上例一樣,記得刪除完數據把id字段也刪除了

刪除重復數據,只保留一條數據

 代碼如下 復制代碼

/* 刪除重復數據,只保留一條數據 */
DELETE FROM `noid`
USING `noid`,(
  SELECT DISTINCT MIN(`id`) AS `id`,`name`,`add`
  FROM `noid`
  GROUP BY `name`,`add`
  HAVING COUNT(1) > 1
) AS `t2`
WHERE `noid`.`name` = `t2`.`name`
  AND `noid`.`add` = `t2`.`add`
  AND `noid`.`id` <> `t2`.`id`;
Query OK, 8 rows affected (0.05 sec)

/* 刪除id字段 */
ALTER TABLE `noid` DROP `id`;
Query OK, 15 rows affected (0.16 sec)
Records: 15  Duplicates: 0  Warnings: 0

SELECT * FROM `noid`;
+----+------+-----+
| pk | name | add |
+----+------+-----+
| a  | abc  | 123 |
| c  | abc  | 321 |
| e  | xzy  | 123 |
| f  | xzy  | 456 |
| i  | xzy  | 789 |
| j  | xzy  | 987 |
| l  | ijk  | 147 |
| n  | ijk  | 852 |
| o  | opq  | 852 |
| p  | opq  | 963 |
| q  | opq  | 741 |
| r  | tpk  | 741 |
| s  | tpk  | 963 |
| u  | wer  | 546 |
| w  | once | 546 |
+----+------+-----+
rows in set (0.00 sec)

copyright © 萬盛學電腦網 all rights reserved