postgresql
# PostgreSQL 数据库架构设计规范 ## 1. 核心设计原则 ### 1.1 命名规范 ```sql -- 表名:复数形式,小写,下划线分隔 CREATE TABLE users ( id BIGSERIAL PRIMARY KEY, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- 列名:小写,下划线分隔 ALTER TABLE users ADD COLUMN phone_number VARCHAR(20); -- 索引:idx_表名_列名 CREATE INDEX idx_users_email ON users(email); -- 约束:ck_表名_约束名 ALTER TABLE orders ADD CONSTRAINT ck_orders_amount CHECK (amount > 0); ``` ### 1.2 数据类型最佳实践 ```sql -- 标识符:使用BIGSERIAL/BIGINT CREATE TABLE products ( id BIGSERIAL PRIMARY KEY, -- 自增主键 uuid UUID DEFAULT gen_random_uuid() -- 唯一标识 ); -- 时间戳:TIMESTAMPTZ CREATE TABLE audit_logs ( event_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- 货币:NUMERIC(19,4) CREATE TABLE transactions ( amount NUMERIC(19,4) NOT NULL, currency CHAR(3) NOT NULL ); -- JSON数据:JSONB CREATE TABLE user_profiles ( preferences JSONB NOT NULL DEFAULT '{}', metadata JSONB ); -- 枚举:CREATE TYPE CREATE TYPE order_status AS ENUM ( 'pending', 'processing', 'shipped', 'delivered', 'cancelled' ); ``` ## 2. 表结构设计 ### 2.1 基础表模板 ```sql CREATE TABLE example_table ( -- 主键 id BIGSERIAL PRIMARY KEY, -- 业务唯一键 business_key VARCHAR(100) UNIQUE NOT NULL, -- 时间戳 created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), deleted_at TIMESTAMPTZ, -- 版本控制 version INTEGER NOT NULL DEFAULT 1, -- 软删除标志 is_deleted BOOLEAN NOT NULL DEFAULT FALSE, -- 审计字段 created_by BIGINT REFERENCES users(id), updated_by BIGINT REFERENCES users(id), -- 检查约束 CONSTRAINT ck_example_status CHECK (status IN ('active', 'inactive', 'suspended')) ) -- 表空间和存储参数 TABLESPACE pg_default; -- 行级安全策略 ALTER TABLE example_table ENABLE ROW LEVEL SECURITY; ``` ### 2.2 分区表设计 ```sql -- 时间范围分区 CREATE TABLE sensor_data ( id BIGSERIAL, sensor_id INTEGER NOT NULL, reading NUMERIC(10,2), recorded_at TIMESTAMPTZ NOT NULL, PRIMARY KEY (id, recorded_at) ) PARTITION BY RANGE (recorded_at); -- 创建月度分区 CREATE TABLE sensor_data_2024_01 PARTITION OF sensor_data FOR VALUES FROM ('2024-01-01') TO ('2024-02-01'); -- 列表分区 CREATE TABLE orders ( id BIGSERIAL, region VARCHAR(20) NOT NULL, amount NUMERIC(19,4) ) PARTITION BY LIST (region); CREATE TABLE orders_north PARTITION OF orders FOR VALUES IN ('beijing', 'tianjin'); ``` ## 3. 索引策略 ### 3.1 基础索引 ```sql -- B-tree索引(默认) CREATE INDEX idx_orders_customer_id ON orders(customer_id); -- 多列索引(注意顺序) CREATE INDEX idx_orders_status_date ON orders(status, created_at DESC); -- 唯一索引 CREATE UNIQUE INDEX uidx_users_email ON users(email) WHERE deleted_at IS NULL; -- 部分索引(过滤无效数据) CREATE INDEX idx_active_products ON products(id) WHERE is_active = true; -- 表达式索引 CREATE INDEX idx_users_lower_email ON users(LOWER(email)); ``` ### 3.2 高级索引 ```sql -- GIN索引(JSONB、数组、全文搜索) CREATE INDEX idx_profiles_preferences ON user_profiles USING GIN (preferences); -- GiST索引(几何数据、范围查询) CREATE INDEX idx_locations_geo ON locations USING GIST (geography_point); -- BRIN索引(大表的时间序列) CREATE INDEX idx_sensor_data_time ON sensor_data USING BRIN (recorded_at); -- 覆盖索引(INCLUDE) CREATE INDEX idx_orders_covering ON orders(customer_id, created_at) INCLUDE (total_amount, status); ``` ## 4. 约束与完整性 ### 4.1 数据完整性约束 ```sql -- 外键约束(级联删除) ALTER TABLE order_items ADD CONSTRAINT fk_order_items_order_id FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE ON UPDATE RESTRICT; -- 检查约束 ALTER TABLE employees ADD CONSTRAINT ck_employees_salary CHECK (salary >= 0 AND salary <= 1000000); -- 排除约束(防止时间重叠) ALTER TABLE room_bookings ADD CONSTRAINT ex_room_bookings_time EXCLUDE USING GIST ( room_id WITH =, tsrange(start_time, end_time) WITH && ); -- NOT NULL约束 ALTER TABLE products ALTER COLUMN sku SET NOT NULL; ``` ## 5. 性能优化模式 ### 5.1 查询优化 ```sql -- 物化视图(缓存复杂查询) CREATE MATERIALIZED VIEW monthly_sales AS SELECT DATE_TRUNC('month', order_date) AS month, SUM(total_amount) AS total_sales, COUNT(*) AS order_count FROM orders WHERE order_date >= '2024-01-01' GROUP BY DATE_TRUNC('month', order_date) WITH DATA; -- 定期刷新 REFRESH MATERIALIZED VIEW CONCURRENTLY monthly_sales; -- 公用表表达式(CTE) WITH recent_orders AS ( SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '30 days' ) SELECT customer_id, COUNT(*) FROM recent_orders GROUP BY customer_id; ``` ### 5.2 连接优化 ```sql -- 使用LATERAL连接 SELECT u.*, latest_order.* FROM users u LEFT JOIN LATERAL ( SELECT * FROM orders WHERE user_id = u.id ORDER BY created_at DESC LIMIT 1 ) latest_order ON true; -- 分区裁剪 SET enable_partition_pruning = on; ``` ## 6. 高级特性 ### 6.1 JSONB操作 ```sql -- JSONB查询优化 CREATE INDEX idx_profiles_preferences_path ON user_profiles USING GIN ((preferences->'settings')); -- JSONB更新 UPDATE user_profiles SET preferences = jsonb_set( preferences, '{notifications,email}', 'true' ) WHERE id = 1; -- JSONB聚合 SELECT jsonb_object_agg(key, value) AS user_settings FROM ( SELECT key, value FROM user_profiles, jsonb_each(preferences->'settings') ) t; ``` ### 6.2 全文搜索 ```sql -- 创建全文搜索索引 ALTER TABLE articles ADD COLUMN search_vector tsvector GENERATED ALWAYS AS ( setweight(to_tsvector('english', COALESCE(title, '')), 'A') || setweight(to_tsvector('english', COALESCE(content, '')), 'B') ) STORED; CREATE INDEX idx_articles_search ON articles USING GIN (search_vector); -- 全文搜索查询 SELECT title, ts_rank(search_vector, query) AS rank FROM articles, plainto_tsquery('english', 'postgresql performance') query WHERE search_vector @@ query ORDER BY rank DESC; ``` ### 6.3 数组类型 ```sql -- 数组列 CREATE TABLE products ( tags TEXT[] NOT NULL DEFAULT '{}', category_ids INTEGER[] NOT NULL DEFAULT '{}' ); -- 数组索引 CREATE INDEX idx_products_tags ON products USING GIN (tags); -- 数组查询 SELECT * FROM products WHERE tags @> ARRAY['electronics', 'sale']; ``` ## 7. 安全与维护 ### 7.1 行级安全 ```sql -- 启用RLS ALTER TABLE user_data ENABLE ROW LEVEL SECURITY; -- 创建策略 CREATE POLICY user_data_policy ON user_data FOR ALL USING (user_id = current_user_id()) WITH CHECK (user_id = current_user_id()); ``` ### 7.2 审计追踪 ```sql -- 审计表 CREATE TABLE audit_trail ( id BIGSERIAL PRIMARY KEY, table_name TEXT NOT NULL, record_id BIGINT NOT NULL, operation CHAR(1) NOT NULL, old_data JSONB, new_data JSONB, changed_at TIMESTAMPTZ DEFAULT NOW(), changed_by BIGINT REFERENCES users(id) ); -- 审计触发器 CREATE OR REPLACE FUNCTION audit_trigger_func() RETURNS TRIGGER AS $$ BEGIN IF (TG_OP = 'DELETE') THEN INSERT INTO audit_trail (table_name, record_id, operation, old_data) VALUES (TG_TABLE_NAME, OLD.id, 'D', row_to_json(OLD)); RETURN OLD; ELSIF (TG_OP = 'UPDATE') THEN INSERT INTO audit_trail (table_name, record_id, operation, old_data, new_data) VALUES (TG_TABLE_NAME, NEW.id, 'U', row_to_json(OLD), row_to_json(NEW)); RETURN NEW; ELSIF (TG_OP = 'INSERT') THEN INSERT INTO audit_trail (table_name, record_id, operation, new_data) VALUES (TG_TABLE_NAME, NEW.id, 'I', row_to_json(NEW)); RETURN NEW; END IF; RETURN NULL; END; $$ LANGUAGE plpgsql; ``` ## 8. 监控与维护 ### 8.1 性能监控视图 ```sql -- 慢查询监控 CREATE VIEW slow_queries AS SELECT query, calls, total_time, mean_time, rows FROM pg_stat_statements WHERE mean_time > 100 -- 超过100ms ORDER BY mean_time DESC; -- 索引使用统计 CREATE VIEW index_usage AS SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch FROM pg_stat_user_indexes; ``` ### 8.2 维护任务 ```sql -- 自动更新updated_at CREATE OR REPLACE FUNCTION update_updated_at_column() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$ LANGUAGE plpgsql; -- 定期VACUUM和ANALYZE -- 在postgresql.conf中配置: -- autovacuum = on -- autovacuum_vacuum_scale_factor = 0.1 -- autovacuum_analyze_scale_factor = 0.05 ``` ## 9. 扩展推荐 ```sql -- 常用扩展 CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- 加密函数 CREATE EXTENSION IF NOT EXISTS "pg_stat_statements"; -- SQL统计 CREATE EXTENSION IF NOT EXISTS "btree_gin"; -- B-tree GIN支持 CREATE EXTENSION IF NOT EXISTS "postgis"; -- 地理空间数据 ``` ## 10. 部署建议 1. **连接池**: 使用PgBouncer或Pgpool-II 2. **复制**: 设置流复制(至少1主1备) 3. **备份**: - 物理备份: pg_basebackup - 逻辑备份: pg_dump + 时间点恢复(PITR) 4. **监控**: - pg_stat_* 系统视图 - pgBadger 日志分析 - Prometheus + Grafana 这个架构设计遵循PostgreSQL最佳实践,结合了性能优化、数据完整性和可维护性考虑,适用于生产环境部署。
PostgreSQL Table Design
Use this skill when
Do not use this skill when
Instructions
EXPLAIN.Safety
Core Rules
BIGINT GENERATED ALWAYS AS IDENTITY; use UUID only when global uniqueness/opacity is needed.NUMERIC for exact decimal arithmetic).PostgreSQL “Gotchas”
snake_case for table/column names.UNIQUE (...) NULLS NOT DISTINCT (PG15+) to restrict to one NULL.NUMERIC(2,0) fails with error, unlike some databases that silently truncate or round.CLUSTER is one-off reorganization, not maintained on subsequent inserts. Row order on disk is insertion order unless explicitly clustered.Data Types
BIGINT GENERATED ALWAYS AS IDENTITY preferred (GENERATED BY DEFAULT also fine); UUID when merging/federating/used in a distributed system or for opaque IDs. Generate with uuidv7() (preferred if using PG18+) or gen_random_uuid() (if using an older PG version).BIGINT unless storage space is critical; INTEGER for smaller ranges; avoid SMALLINT unless constrained.DOUBLE PRECISION over REAL unless storage space is critical. Use NUMERIC for exact decimal arithmetic.TEXT; if length limits needed, use CHECK (LENGTH(col) <= n) instead of VARCHAR(n); avoid CHAR(n). Use BYTEA for binary data. Large strings/binary (>2KB default threshold) automatically stored in TOAST with compression. TOAST storage: PLAIN (no TOAST), EXTENDED (compress + out-of-line), EXTERNAL (out-of-line, no compress), MAIN (compress, keep in-line if possible). Default EXTENDED usually optimal. Control with ALTER TABLE tbl ALTER COLUMN col SET STORAGE strategy and ALTER TABLE tbl SET (toast_tuple_target = 4096) for threshold. Case-insensitive: for locale/accent handling use non-deterministic collations; for plain ASCII use expression indexes on LOWER(col) (preferred unless column needs case-insensitive PK/FK/UNIQUE) or CITEXT.NUMERIC(p,s) (never float).TIMESTAMPTZ for timestamps; DATE for date-only; INTERVAL for durations. Avoid TIMESTAMP (without timezone). Use now() for transaction start time, clock_timestamp() for current wall-clock time.BOOLEAN with NOT NULL constraint unless tri-state values are required.CREATE TYPE ... AS ENUM for small, stable sets (e.g. US states, days of week). For business-logic-driven and evolving values (e.g. order statuses) → use TEXT (or INT) + CHECK or lookup table.TEXT[], INTEGER[], etc. Use for ordered lists where you query elements. Index with GIN for containment (@>, <@) and overlap (&&) queries. Access: arr[1] (1-indexed), arr[1:3] (slicing). Good for tags, categories; avoid for relations—use junction tables instead. Literal syntax: '{val1,val2}' or ARRAY[val1,val2].daterange, numrange, tstzrange for intervals. Support overlap (&&), containment (@>), operators. Index with GiST. Good for scheduling, versioning, numeric ranges. Pick a bounds scheme and use it consistently; prefer [) (inclusive/exclusive) by default.INET for IP addresses, CIDR for network ranges, MACADDR for MAC addresses. Support network operators (<<, >>, &&).POINT, LINE, POLYGON, CIRCLE for 2D spatial data. Index with GiST. Consider PostGIS for advanced spatial features.TSVECTOR for full-text search documents, TSQUERY for search queries. Index tsvector with GIN. Always specify language: to_tsvector('english', col) and to_tsquery('english', 'query'). Never use single-argument versions. This applies to both index expressions and queries.CREATE DOMAIN email AS TEXT CHECK (VALUE ~ '^[^@]+@[^@]+$') for reusable custom types with validation. Enforces constraints across tables.CREATE TYPE address AS (street TEXT, city TEXT, zip TEXT) for structured data within columns. Access with (col).field syntax.vector type by pgvector for vector similarity search for embeddings.Do not use the following data types
timestamp (without time zone); DO use timestamptz instead.char(n) or varchar(n); DO use text instead.money type; DO use numeric instead.timetz type; DO use timestamptz instead.timestamptz(0) or any other precision specification; DO use timestamptz insteadserial type; DO use generated always as identity instead.Table Types
Row-Level Security
Enable with ALTER TABLE tbl ENABLE ROW LEVEL SECURITY. Create policies: CREATE POLICY user_access ON orders FOR SELECT TO app_users USING (user_id = current_user_id()). Built-in user-based access control at the row level.
Constraints
ON DELETE/UPDATE action (CASCADE, RESTRICT, SET NULL, SET DEFAULT). Add explicit index on referencing column—speeds up joins and prevents locking issues on parent deletes/updates. Use DEFERRABLE INITIALLY DEFERRED for circular FK dependencies checked at transaction end.NULLS NOT DISTINCT (PG15+). Standard behavior: (1, NULL) and (1, NULL) are allowed. With NULLS NOT DISTINCT: only one (1, NULL) allowed. Prefer NULLS NOT DISTINCT unless you specifically need duplicate NULLs.CHECK (price > 0) allows NULL prices. Combine with NOT NULL to enforce: price NUMERIC NOT NULL CHECK (price > 0).EXCLUDE USING gist (room_id WITH =, booking_period WITH &&) prevents double-booking rooms. Requires appropriate index type (often GiST).Indexing
=, <, >, BETWEEN, ORDER BY)WHERE a = ? AND b > ? uses index on (a,b), but WHERE b = ? does not). Put most selective/frequently filtered columns first.CREATE INDEX ON tbl (id) INCLUDE (name, email) - includes non-key columns for index-only scans without visiting table.WHERE status = 'active' → CREATE INDEX ON tbl (user_id) WHERE status = 'active'). Any query with status = 'active' can use this index.CREATE INDEX ON tbl (LOWER(email))). Expression must match exactly in WHERE clause: WHERE LOWER(email) = 'user@example.com'.@>, ?), full-text search (@@)CLUSTER).Partitioning
PARTITION BY RANGE (created_at)). Create partitions: CREATE TABLE logs_2024_01 PARTITION OF logs FOR VALUES FROM ('2024-01-01') TO ('2024-02-01'). TimescaleDB automates time-based or ID-based partitioning with retention policies and compression.PARTITION BY LIST (region)). Example: FOR VALUES IN ('us-east', 'us-west').PARTITION BY HASH (user_id)). Creates N partitions with modulus.CHECK constraints on partitions for query planner to prune. Auto-created for declarative partitioning (PG10+).Special Considerations
Update-Heavy Tables
fillfactor=90 to leave space for HOT updates that avoid index maintenance.Insert-Heavy Workloads
COPY or multi-row INSERT instead of single-row inserts.BIGINT GENERATED ALWAYS AS IDENTITY over UUID.Upsert-Friendly Design
ON CONFLICT (col1, col2) needs exact matching unique index (partial indexes don't work).EXCLUDED.column to reference would-be-inserted values; only update columns that actually changed to reduce write overhead.DO NOTHING faster than DO UPDATE when no actual update needed.Safe Schema Evolution
BEGIN; ALTER TABLE...; ROLLBACK; for safe testing.CREATE INDEX CONCURRENTLY avoids blocking writes but can't run in transactions.NOT NULL columns with volatile defaults (e.g., now(), gen_random_uuid()) rewrites entire table. Non-volatile defaults are fast.ALTER TABLE DROP CONSTRAINT then DROP COLUMN to avoid dependency issues.CREATE OR REPLACE with different arguments creates overloads, not replacements. DROP old version if no overload desired.Generated Columns
... GENERATED ALWAYS AS () STORED for computed, indexable fields. PG18+ adds VIRTUAL columns (computed on read, not stored).Extensions
pgcrypto: crypt() for password hashing.uuid-ossp: alternative UUID functions; prefer pgcrypto for new projects.pg_trgm: fuzzy text search with % operator, similarity() function. Index with GIN for LIKE '%pattern%' acceleration.citext: case-insensitive text type. Prefer expression indexes on LOWER(col) unless you need case-insensitive constraints.btree_gin/btree_gist: enable mixed-type indexes (e.g., GIN index on both JSONB and text columns).hstore: key-value pairs; mostly superseded by JSONB but useful for simple string mappings.timescaledb: essential for time-series—automated partitioning, retention, compression, continuous aggregates.postgis: comprehensive geospatial support beyond basic geometric types—essential for location-based applications.pgvector: vector similarity search for embeddings.pgaudit: audit logging for all database activity.JSONB Guidance
JSONB with GIN index.CREATE INDEX ON tbl USING GIN (jsonb_col); → accelerates:- Containment
jsonb_col @> '{"k":"v"}'- Key existence
jsonb_col ? 'k', any/all keys ?\|, ?&- Path containment on nested docs
- Disjunction
jsonb_col @> ANY(ARRAY['{"status":"active"}', '{"status":"pending"}'])@> workloads: consider opclass jsonb_path_ops for smaller/faster containment-only indexes:-
CREATE INDEX ON tbl USING GIN (jsonb_col jsonb_path_ops);- Trade-off: loses support for key existence (
?, ?|, ?&) queries—only supports containment (@>)-
ALTER TABLE tbl ADD COLUMN price INT GENERATED ALWAYS AS ((jsonb_col->>'price')::INT) STORED;-
CREATE INDEX ON tbl (price);- Prefer queries like
WHERE price BETWEEN 100 AND 500 (uses B-tree) over WHERE (jsonb_col->>'price')::INT BETWEEN 100 AND 500 without index.@> for containment (e.g., tags). Consider jsonb_path_ops if only doing containment.config JSONB NOT NULL CHECK(jsonb_typeof(config) = 'object')Examples
Users
CREATE TABLE users (
user_id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
email TEXT NOT NULL UNIQUE,
name TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE UNIQUE INDEX ON users (LOWER(email));
CREATE INDEX ON users (created_at);Orders
CREATE TABLE orders (
order_id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES users(user_id),
status TEXT NOT NULL DEFAULT 'PENDING' CHECK (status IN ('PENDING','PAID','CANCELED')),
total NUMERIC(10,2) NOT NULL CHECK (total > 0),
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX ON orders (user_id);
CREATE INDEX ON orders (created_at);JSONB
CREATE TABLE profiles (
user_id BIGINT PRIMARY KEY REFERENCES users(user_id),
attrs JSONB NOT NULL DEFAULT '{}',
theme TEXT GENERATED ALWAYS AS (attrs->>'theme') STORED
);
CREATE INDEX profiles_attrs_gin ON profiles USING GIN (attrs);