方案 1:使用 DISTINCT ON(PostgreSQL 特有,推荐)
-- 只保留每个 id 的第一条
SELECT DISTINCT ON (id) * FROM t
ORDER BY id, ctid;
方案 2:使用 ROW_NUMBER(推荐)
-- 查看重复行
WITH ranked AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) AS rn
FROM t
)
SELECT * FROM ranked WHERE rn > 1;
-- 删除重复行
WITH ranked AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) AS rn
FROM t
)
DELETE FROM t
WHERE ctid IN (SELECT ctid FROM ranked WHERE rn > 1);
优点: SQL 标准,可读性强,更灵活
缺点: 需要配合 ctid 来实现删除
方案 3:使用 CTID
-- 查看重复行
SELECT * FROM t a
WHERE a.ctid <> (
SELECT MIN(b.ctid)
FROM t b
WHERE a.id = b.id
);
-- 删除重复行
DELETE FROM t
WHERE ctid <> (
SELECT MIN(b.ctid)
FROM t b
WHERE t.id = b.id
);
优点: 快速,直接操作物理位置
缺点: PostgreSQL 特有语法,不够优雅
方案 4:使用 NOT IN(简单但低效)
-- 保留 id 最小的一条
DELETE FROM t
WHERE id NOT IN (
SELECT MIN(id)
FROM t
GROUP BY id
);
缺点: 如果 id 相同但其他字段不同,逻辑可能有问题
