postgresql

# PostgreSQL 数据库架构设计规范 ## 1. 核心设计原则 ### 1.1 命名规范 ```sql -- 表名:复数形式,小写,下划线分隔 CREATE TABLE users ( id BIGSERIAL PRIMARY KEY, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- 列名:小写,下划线分隔 ALTER TABLE users ADD COLUMN phone_number VARCHAR(20); -- 索引:idx_表名_列名 CREATE INDEX idx_users_email ON users(email); -- 约束:ck_表名_约束名 ALTER TABLE orders ADD CONSTRAINT ck_orders_amount CHECK (amount > 0); ``` ### 1.2 数据类型最佳实践 ```sql -- 标识符:使用BIGSERIAL/BIGINT CREATE TABLE products ( id BIGSERIAL PRIMARY KEY, -- 自增主键 uuid UUID DEFAULT gen_random_uuid() -- 唯一标识 ); -- 时间戳:TIMESTAMPTZ CREATE TABLE audit_logs ( event_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- 货币:NUMERIC(19,4) CREATE TABLE transactions ( amount NUMERIC(19,4) NOT NULL, currency CHAR(3) NOT NULL ); -- JSON数据:JSONB CREATE TABLE user_profiles ( preferences JSONB NOT NULL DEFAULT '{}', metadata JSONB ); -- 枚举:CREATE TYPE CREATE TYPE order_status AS ENUM ( 'pending', 'processing', 'shipped', 'delivered', 'cancelled' ); ``` ## 2. 表结构设计 ### 2.1 基础表模板 ```sql CREATE TABLE example_table ( -- 主键 id BIGSERIAL PRIMARY KEY, -- 业务唯一键 business_key VARCHAR(100) UNIQUE NOT NULL, -- 时间戳 created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), deleted_at TIMESTAMPTZ, -- 版本控制 version INTEGER NOT NULL DEFAULT 1, -- 软删除标志 is_deleted BOOLEAN NOT NULL DEFAULT FALSE, -- 审计字段 created_by BIGINT REFERENCES users(id), updated_by BIGINT REFERENCES users(id), -- 检查约束 CONSTRAINT ck_example_status CHECK (status IN ('active', 'inactive', 'suspended')) ) -- 表空间和存储参数 TABLESPACE pg_default; -- 行级安全策略 ALTER TABLE example_table ENABLE ROW LEVEL SECURITY; ``` ### 2.2 分区表设计 ```sql -- 时间范围分区 CREATE TABLE sensor_data ( id BIGSERIAL, sensor_id INTEGER NOT NULL, reading NUMERIC(10,2), recorded_at TIMESTAMPTZ NOT NULL, PRIMARY KEY (id, recorded_at) ) PARTITION BY RANGE (recorded_at); -- 创建月度分区 CREATE TABLE sensor_data_2024_01 PARTITION OF sensor_data FOR VALUES FROM ('2024-01-01') TO ('2024-02-01'); -- 列表分区 CREATE TABLE orders ( id BIGSERIAL, region VARCHAR(20) NOT NULL, amount NUMERIC(19,4) ) PARTITION BY LIST (region); CREATE TABLE orders_north PARTITION OF orders FOR VALUES IN ('beijing', 'tianjin'); ``` ## 3. 索引策略 ### 3.1 基础索引 ```sql -- B-tree索引(默认) CREATE INDEX idx_orders_customer_id ON orders(customer_id); -- 多列索引(注意顺序) CREATE INDEX idx_orders_status_date ON orders(status, created_at DESC); -- 唯一索引 CREATE UNIQUE INDEX uidx_users_email ON users(email) WHERE deleted_at IS NULL; -- 部分索引(过滤无效数据) CREATE INDEX idx_active_products ON products(id) WHERE is_active = true; -- 表达式索引 CREATE INDEX idx_users_lower_email ON users(LOWER(email)); ``` ### 3.2 高级索引 ```sql -- GIN索引(JSONB、数组、全文搜索) CREATE INDEX idx_profiles_preferences ON user_profiles USING GIN (preferences); -- GiST索引(几何数据、范围查询) CREATE INDEX idx_locations_geo ON locations USING GIST (geography_point); -- BRIN索引(大表的时间序列) CREATE INDEX idx_sensor_data_time ON sensor_data USING BRIN (recorded_at); -- 覆盖索引(INCLUDE) CREATE INDEX idx_orders_covering ON orders(customer_id, created_at) INCLUDE (total_amount, status); ``` ## 4. 约束与完整性 ### 4.1 数据完整性约束 ```sql -- 外键约束(级联删除) ALTER TABLE order_items ADD CONSTRAINT fk_order_items_order_id FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE ON UPDATE RESTRICT; -- 检查约束 ALTER TABLE employees ADD CONSTRAINT ck_employees_salary CHECK (salary >= 0 AND salary <= 1000000); -- 排除约束(防止时间重叠) ALTER TABLE room_bookings ADD CONSTRAINT ex_room_bookings_time EXCLUDE USING GIST ( room_id WITH =, tsrange(start_time, end_time) WITH && ); -- NOT NULL约束 ALTER TABLE products ALTER COLUMN sku SET NOT NULL; ``` ## 5. 性能优化模式 ### 5.1 查询优化 ```sql -- 物化视图(缓存复杂查询) CREATE MATERIALIZED VIEW monthly_sales AS SELECT DATE_TRUNC('month', order_date) AS month, SUM(total_amount) AS total_sales, COUNT(*) AS order_count FROM orders WHERE order_date >= '2024-01-01' GROUP BY DATE_TRUNC('month', order_date) WITH DATA; -- 定期刷新 REFRESH MATERIALIZED VIEW CONCURRENTLY monthly_sales; -- 公用表表达式(CTE) WITH recent_orders AS ( SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '30 days' ) SELECT customer_id, COUNT(*) FROM recent_orders GROUP BY customer_id; ``` ### 5.2 连接优化 ```sql -- 使用LATERAL连接 SELECT u.*, latest_order.* FROM users u LEFT JOIN LATERAL ( SELECT * FROM orders WHERE user_id = u.id ORDER BY created_at DESC LIMIT 1 ) latest_order ON true; -- 分区裁剪 SET enable_partition_pruning = on; ``` ## 6. 高级特性 ### 6.1 JSONB操作 ```sql -- JSONB查询优化 CREATE INDEX idx_profiles_preferences_path ON user_profiles USING GIN ((preferences->'settings')); -- JSONB更新 UPDATE user_profiles SET preferences = jsonb_set( preferences, '{notifications,email}', 'true' ) WHERE id = 1; -- JSONB聚合 SELECT jsonb_object_agg(key, value) AS user_settings FROM ( SELECT key, value FROM user_profiles, jsonb_each(preferences->'settings') ) t; ``` ### 6.2 全文搜索 ```sql -- 创建全文搜索索引 ALTER TABLE articles ADD COLUMN search_vector tsvector GENERATED ALWAYS AS ( setweight(to_tsvector('english', COALESCE(title, '')), 'A') || setweight(to_tsvector('english', COALESCE(content, '')), 'B') ) STORED; CREATE INDEX idx_articles_search ON articles USING GIN (search_vector); -- 全文搜索查询 SELECT title, ts_rank(search_vector, query) AS rank FROM articles, plainto_tsquery('english', 'postgresql performance') query WHERE search_vector @@ query ORDER BY rank DESC; ``` ### 6.3 数组类型 ```sql -- 数组列 CREATE TABLE products ( tags TEXT[] NOT NULL DEFAULT '{}', category_ids INTEGER[] NOT NULL DEFAULT '{}' ); -- 数组索引 CREATE INDEX idx_products_tags ON products USING GIN (tags); -- 数组查询 SELECT * FROM products WHERE tags @> ARRAY['electronics', 'sale']; ``` ## 7. 安全与维护 ### 7.1 行级安全 ```sql -- 启用RLS ALTER TABLE user_data ENABLE ROW LEVEL SECURITY; -- 创建策略 CREATE POLICY user_data_policy ON user_data FOR ALL USING (user_id = current_user_id()) WITH CHECK (user_id = current_user_id()); ``` ### 7.2 审计追踪 ```sql -- 审计表 CREATE TABLE audit_trail ( id BIGSERIAL PRIMARY KEY, table_name TEXT NOT NULL, record_id BIGINT NOT NULL, operation CHAR(1) NOT NULL, old_data JSONB, new_data JSONB, changed_at TIMESTAMPTZ DEFAULT NOW(), changed_by BIGINT REFERENCES users(id) ); -- 审计触发器 CREATE OR REPLACE FUNCTION audit_trigger_func() RETURNS TRIGGER AS $$ BEGIN IF (TG_OP = 'DELETE') THEN INSERT INTO audit_trail (table_name, record_id, operation, old_data) VALUES (TG_TABLE_NAME, OLD.id, 'D', row_to_json(OLD)); RETURN OLD; ELSIF (TG_OP = 'UPDATE') THEN INSERT INTO audit_trail (table_name, record_id, operation, old_data, new_data) VALUES (TG_TABLE_NAME, NEW.id, 'U', row_to_json(OLD), row_to_json(NEW)); RETURN NEW; ELSIF (TG_OP = 'INSERT') THEN INSERT INTO audit_trail (table_name, record_id, operation, new_data) VALUES (TG_TABLE_NAME, NEW.id, 'I', row_to_json(NEW)); RETURN NEW; END IF; RETURN NULL; END; $$ LANGUAGE plpgsql; ``` ## 8. 监控与维护 ### 8.1 性能监控视图 ```sql -- 慢查询监控 CREATE VIEW slow_queries AS SELECT query, calls, total_time, mean_time, rows FROM pg_stat_statements WHERE mean_time > 100 -- 超过100ms ORDER BY mean_time DESC; -- 索引使用统计 CREATE VIEW index_usage AS SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read, idx_tup_fetch FROM pg_stat_user_indexes; ``` ### 8.2 维护任务 ```sql -- 自动更新updated_at CREATE OR REPLACE FUNCTION update_updated_at_column() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = NOW(); RETURN NEW; END; $$ LANGUAGE plpgsql; -- 定期VACUUM和ANALYZE -- 在postgresql.conf中配置: -- autovacuum = on -- autovacuum_vacuum_scale_factor = 0.1 -- autovacuum_analyze_scale_factor = 0.05 ``` ## 9. 扩展推荐 ```sql -- 常用扩展 CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "pgcrypto"; -- 加密函数 CREATE EXTENSION IF NOT EXISTS "pg_stat_statements"; -- SQL统计 CREATE EXTENSION IF NOT EXISTS "btree_gin"; -- B-tree GIN支持 CREATE EXTENSION IF NOT EXISTS "postgis"; -- 地理空间数据 ``` ## 10. 部署建议 1. **连接池**: 使用PgBouncer或Pgpool-II 2. **复制**: 设置流复制(至少1主1备) 3. **备份**: - 物理备份: pg_basebackup - 逻辑备份: pg_dump + 时间点恢复(PITR) 4. **监控**: - pg_stat_* 系统视图 - pgBadger 日志分析 - Prometheus + Grafana 这个架构设计遵循PostgreSQL最佳实践,结合了性能优化、数据完整性和可维护性考虑,适用于生产环境部署。

查看详情
name:postgresqldescription:Design a PostgreSQL-specific schema. Covers best-practices, data types, indexing, constraints, performance patterns, and advanced features

PostgreSQL Table Design

Use this skill when

  • Designing a schema for PostgreSQL

  • Selecting data types and constraints

  • Planning indexes, partitions, or RLS policies

  • Reviewing tables for scale and maintainability
  • Do not use this skill when

  • You are targeting a non-PostgreSQL database

  • You only need query tuning without schema changes

  • You require a DB-agnostic modeling guide
  • Instructions

  • Capture entities, access patterns, and scale targets (rows, QPS, retention).

  • Choose data types and constraints that enforce invariants.

  • Add indexes for real query paths and validate with EXPLAIN.

  • Plan partitioning or RLS where required by scale or access control.

  • Review migration impact and apply changes safely.
  • Safety

  • Avoid destructive DDL on production without backups and a rollback plan.

  • Use migrations and staging validation before applying schema changes.
  • Core Rules

  • Define a PRIMARY KEY for reference tables (users, orders, etc.). Not always needed for time-series/event/log data. When used, prefer BIGINT GENERATED ALWAYS AS IDENTITY; use UUID only when global uniqueness/opacity is needed.

  • Normalize first (to 3NF) to eliminate data redundancy and update anomalies; denormalize only for measured, high-ROI reads where join performance is proven problematic. Premature denormalization creates maintenance burden.

  • Add NOT NULL everywhere it’s semantically required; use DEFAULTs for common values.

  • Create indexes for access paths you actually query: PK/unique (auto), FK columns (manual!), frequent filters/sorts, and join keys.

  • Prefer TIMESTAMPTZ for event time; NUMERIC for money; TEXT for strings; BIGINT for integer values, DOUBLE PRECISION for floats (or NUMERIC for exact decimal arithmetic).
  • PostgreSQL “Gotchas”

  • Identifiers: unquoted → lowercased. Avoid quoted/mixed-case names. Convention: use snake_case for table/column names.

  • Unique + NULLs: UNIQUE allows multiple NULLs. Use UNIQUE (...) NULLS NOT DISTINCT (PG15+) to restrict to one NULL.

  • FK indexes: PostgreSQL does not auto-index FK columns. Add them.

  • No silent coercions: length/precision overflows error out (no truncation). Example: inserting 999 into NUMERIC(2,0) fails with error, unlike some databases that silently truncate or round.

  • Sequences/identity have gaps (normal; don't "fix"). Rollbacks, crashes, and concurrent transactions create gaps in ID sequences (1, 2, 5, 6...). This is expected behavior—don't try to make IDs consecutive.

  • Heap storage: no clustered PK by default (unlike SQL Server/MySQL InnoDB); CLUSTER is one-off reorganization, not maintained on subsequent inserts. Row order on disk is insertion order unless explicitly clustered.

  • MVCC: updates/deletes leave dead tuples; vacuum handles them—design to avoid hot wide-row churn.
  • Data Types

  • IDs: BIGINT GENERATED ALWAYS AS IDENTITY preferred (GENERATED BY DEFAULT also fine); UUID when merging/federating/used in a distributed system or for opaque IDs. Generate with uuidv7() (preferred if using PG18+) or gen_random_uuid() (if using an older PG version).

  • Integers: prefer BIGINT unless storage space is critical; INTEGER for smaller ranges; avoid SMALLINT unless constrained.

  • Floats: prefer DOUBLE PRECISION over REAL unless storage space is critical. Use NUMERIC for exact decimal arithmetic.

  • Strings: prefer TEXT; if length limits needed, use CHECK (LENGTH(col) <= n) instead of VARCHAR(n); avoid CHAR(n). Use BYTEA for binary data. Large strings/binary (>2KB default threshold) automatically stored in TOAST with compression. TOAST storage: PLAIN (no TOAST), EXTENDED (compress + out-of-line), EXTERNAL (out-of-line, no compress), MAIN (compress, keep in-line if possible). Default EXTENDED usually optimal. Control with ALTER TABLE tbl ALTER COLUMN col SET STORAGE strategy and ALTER TABLE tbl SET (toast_tuple_target = 4096) for threshold. Case-insensitive: for locale/accent handling use non-deterministic collations; for plain ASCII use expression indexes on LOWER(col) (preferred unless column needs case-insensitive PK/FK/UNIQUE) or CITEXT.

  • Money: NUMERIC(p,s) (never float).

  • Time: TIMESTAMPTZ for timestamps; DATE for date-only; INTERVAL for durations. Avoid TIMESTAMP (without timezone). Use now() for transaction start time, clock_timestamp() for current wall-clock time.

  • Booleans: BOOLEAN with NOT NULL constraint unless tri-state values are required.

  • Enums: CREATE TYPE ... AS ENUM for small, stable sets (e.g. US states, days of week). For business-logic-driven and evolving values (e.g. order statuses) → use TEXT (or INT) + CHECK or lookup table.

  • Arrays: TEXT[], INTEGER[], etc. Use for ordered lists where you query elements. Index with GIN for containment (@>, <@) and overlap (&&) queries. Access: arr[1] (1-indexed), arr[1:3] (slicing). Good for tags, categories; avoid for relations—use junction tables instead. Literal syntax: '{val1,val2}' or ARRAY[val1,val2].

  • Range types: daterange, numrange, tstzrange for intervals. Support overlap (&&), containment (@>), operators. Index with GiST. Good for scheduling, versioning, numeric ranges. Pick a bounds scheme and use it consistently; prefer [) (inclusive/exclusive) by default.

  • Network types: INET for IP addresses, CIDR for network ranges, MACADDR for MAC addresses. Support network operators (<<, >>, &&).

  • Geometric types: POINT, LINE, POLYGON, CIRCLE for 2D spatial data. Index with GiST. Consider PostGIS for advanced spatial features.

  • Text search: TSVECTOR for full-text search documents, TSQUERY for search queries. Index tsvector with GIN. Always specify language: to_tsvector('english', col) and to_tsquery('english', 'query'). Never use single-argument versions. This applies to both index expressions and queries.

  • Domain types: CREATE DOMAIN email AS TEXT CHECK (VALUE ~ '^[^@]+@[^@]+$') for reusable custom types with validation. Enforces constraints across tables.

  • Composite types: CREATE TYPE address AS (street TEXT, city TEXT, zip TEXT) for structured data within columns. Access with (col).field syntax.

  • JSONB: preferred over JSON; index with GIN. Use only for optional/semi-structured attrs. ONLY use JSON if the original ordering of the contents MUST be preserved.

  • Vector types: vector type by pgvector for vector similarity search for embeddings.

  • Do not use the following data types


  • DO NOT use timestamp (without time zone); DO use timestamptz instead.

  • DO NOT use char(n) or varchar(n); DO use text instead.

  • DO NOT use money type; DO use numeric instead.

  • DO NOT use timetz type; DO use timestamptz instead.

  • DO NOT use timestamptz(0) or any other precision specification; DO use timestamptz instead

  • DO NOT use serial type; DO use generated always as identity instead.

  • Table Types

  • Regular: default; fully durable, logged.

  • TEMPORARY: session-scoped, auto-dropped, not logged. Faster for scratch work.

  • UNLOGGED: persistent but not crash-safe. Faster writes; good for caches/staging.
  • Row-Level Security

    Enable with ALTER TABLE tbl ENABLE ROW LEVEL SECURITY. Create policies: CREATE POLICY user_access ON orders FOR SELECT TO app_users USING (user_id = current_user_id()). Built-in user-based access control at the row level.

    Constraints

  • PK: implicit UNIQUE + NOT NULL; creates a B-tree index.

  • FK: specify ON DELETE/UPDATE action (CASCADE, RESTRICT, SET NULL, SET DEFAULT). Add explicit index on referencing column—speeds up joins and prevents locking issues on parent deletes/updates. Use DEFERRABLE INITIALLY DEFERRED for circular FK dependencies checked at transaction end.

  • UNIQUE: creates a B-tree index; allows multiple NULLs unless NULLS NOT DISTINCT (PG15+). Standard behavior: (1, NULL) and (1, NULL) are allowed. With NULLS NOT DISTINCT: only one (1, NULL) allowed. Prefer NULLS NOT DISTINCT unless you specifically need duplicate NULLs.

  • CHECK: row-local constraints; NULL values pass the check (three-valued logic). Example: CHECK (price > 0) allows NULL prices. Combine with NOT NULL to enforce: price NUMERIC NOT NULL CHECK (price > 0).

  • EXCLUDE: prevents overlapping values using operators. EXCLUDE USING gist (room_id WITH =, booking_period WITH &&) prevents double-booking rooms. Requires appropriate index type (often GiST).
  • Indexing

  • B-tree: default for equality/range queries (=, <, >, BETWEEN, ORDER BY)

  • Composite: order matters—index used if equality on leftmost prefix (WHERE a = ? AND b > ? uses index on (a,b), but WHERE b = ? does not). Put most selective/frequently filtered columns first.

  • Covering: CREATE INDEX ON tbl (id) INCLUDE (name, email) - includes non-key columns for index-only scans without visiting table.

  • Partial: for hot subsets (WHERE status = 'active'CREATE INDEX ON tbl (user_id) WHERE status = 'active'). Any query with status = 'active' can use this index.

  • Expression: for computed search keys (CREATE INDEX ON tbl (LOWER(email))). Expression must match exactly in WHERE clause: WHERE LOWER(email) = 'user@example.com'.

  • GIN: JSONB containment/existence, arrays (@>, ?), full-text search (@@)

  • GiST: ranges, geometry, exclusion constraints

  • BRIN: very large, naturally ordered data (time-series)—minimal storage overhead. Effective when row order on disk correlates with indexed column (insertion order or after CLUSTER).
  • Partitioning

  • Use for very large tables (>100M rows) where queries consistently filter on partition key (often time/date).

  • Alternate use: use for tables where data maintenance tasks dictates e.g. data pruned or bulk replaced periodically

  • RANGE: common for time-series (PARTITION BY RANGE (created_at)). Create partitions: CREATE TABLE logs_2024_01 PARTITION OF logs FOR VALUES FROM ('2024-01-01') TO ('2024-02-01'). TimescaleDB automates time-based or ID-based partitioning with retention policies and compression.

  • LIST: for discrete values (PARTITION BY LIST (region)). Example: FOR VALUES IN ('us-east', 'us-west').

  • HASH: for even distribution when no natural key (PARTITION BY HASH (user_id)). Creates N partitions with modulus.

  • Constraint exclusion: requires CHECK constraints on partitions for query planner to prune. Auto-created for declarative partitioning (PG10+).

  • Prefer declarative partitioning or hypertables. Do NOT use table inheritance.

  • Limitations: no global UNIQUE constraints—include partition key in PK/UNIQUE. FKs from partitioned tables not supported; use triggers.
  • Special Considerations

    Update-Heavy Tables

  • Separate hot/cold columns—put frequently updated columns in separate table to minimize bloat.

  • Use fillfactor=90 to leave space for HOT updates that avoid index maintenance.

  • Avoid updating indexed columns—prevents beneficial HOT updates.

  • Partition by update patterns—separate frequently updated rows in a different partition from stable data.
  • Insert-Heavy Workloads

  • Minimize indexes—only create what you query; every index slows inserts.

  • Use COPY or multi-row INSERT instead of single-row inserts.

  • UNLOGGED tables for rebuildable staging data—much faster writes.

  • Defer index creation for bulk loads—>drop index, load data, recreate indexes.

  • Partition by time/hash to distribute load. TimescaleDB automates partitioning and compression of insert-heavy data.

  • Use a natural key for primary key such as a (timestamp, device_id) if enforcing global uniqueness is important many insert-heavy tables don't need a primary key at all.

  • If you do need a surrogate key, Prefer BIGINT GENERATED ALWAYS AS IDENTITY over UUID.
  • Upsert-Friendly Design

  • Requires UNIQUE index on conflict target columns—ON CONFLICT (col1, col2) needs exact matching unique index (partial indexes don't work).

  • Use EXCLUDED.column to reference would-be-inserted values; only update columns that actually changed to reduce write overhead.

  • DO NOTHING faster than DO UPDATE when no actual update needed.
  • Safe Schema Evolution

  • Transactional DDL: most DDL operations can run in transactions and be rolled back—BEGIN; ALTER TABLE...; ROLLBACK; for safe testing.

  • Concurrent index creation: CREATE INDEX CONCURRENTLY avoids blocking writes but can't run in transactions.

  • Volatile defaults cause rewrites: adding NOT NULL columns with volatile defaults (e.g., now(), gen_random_uuid()) rewrites entire table. Non-volatile defaults are fast.

  • Drop constraints before columns: ALTER TABLE DROP CONSTRAINT then DROP COLUMN to avoid dependency issues.

  • Function signature changes: CREATE OR REPLACE with different arguments creates overloads, not replacements. DROP old version if no overload desired.
  • Generated Columns

  • ... GENERATED ALWAYS AS () STORED for computed, indexable fields. PG18+ adds VIRTUAL columns (computed on read, not stored).
  • Extensions

  • pgcrypto: crypt() for password hashing.

  • uuid-ossp: alternative UUID functions; prefer pgcrypto for new projects.

  • pg_trgm: fuzzy text search with % operator, similarity() function. Index with GIN for LIKE '%pattern%' acceleration.

  • citext: case-insensitive text type. Prefer expression indexes on LOWER(col) unless you need case-insensitive constraints.

  • btree_gin/btree_gist: enable mixed-type indexes (e.g., GIN index on both JSONB and text columns).

  • hstore: key-value pairs; mostly superseded by JSONB but useful for simple string mappings.

  • timescaledb: essential for time-series—automated partitioning, retention, compression, continuous aggregates.

  • postgis: comprehensive geospatial support beyond basic geometric types—essential for location-based applications.

  • pgvector: vector similarity search for embeddings.

  • pgaudit: audit logging for all database activity.
  • JSONB Guidance

  • Prefer JSONB with GIN index.

  • Default: CREATE INDEX ON tbl USING GIN (jsonb_col); → accelerates:

  • - Containment jsonb_col @> '{"k":"v"}'
    - Key existence jsonb_col ? 'k', any/all keys ?\|, ?&
    - Path containment on nested docs
    - Disjunction jsonb_col @> ANY(ARRAY['{"status":"active"}', '{"status":"pending"}'])
  • Heavy @> workloads: consider opclass jsonb_path_ops for smaller/faster containment-only indexes:

  • - CREATE INDEX ON tbl USING GIN (jsonb_col jsonb_path_ops);
    - Trade-off: loses support for key existence (?, ?|, ?&) queries—only supports containment (@>)
  • Equality/range on a specific scalar field: extract and index with B-tree (generated column or expression):

  • - ALTER TABLE tbl ADD COLUMN price INT GENERATED ALWAYS AS ((jsonb_col->>'price')::INT) STORED;
    - CREATE INDEX ON tbl (price);
    - Prefer queries like WHERE price BETWEEN 100 AND 500 (uses B-tree) over WHERE (jsonb_col->>'price')::INT BETWEEN 100 AND 500 without index.
  • Arrays inside JSONB: use GIN + @> for containment (e.g., tags). Consider jsonb_path_ops if only doing containment.

  • Keep core relations in tables; use JSONB for optional/variable attributes.

  • Use constraints to limit allowed JSONB values in a column e.g. config JSONB NOT NULL CHECK(jsonb_typeof(config) = 'object')

  • Examples

    Users

    CREATE TABLE users (
    user_id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
    email TEXT NOT NULL UNIQUE,
    name TEXT NOT NULL,
    created_at TIMESTAMPTZ NOT NULL DEFAULT now()
    );
    CREATE UNIQUE INDEX ON users (LOWER(email));
    CREATE INDEX ON users (created_at);

    Orders

    CREATE TABLE orders (
    order_id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
    user_id BIGINT NOT NULL REFERENCES users(user_id),
    status TEXT NOT NULL DEFAULT 'PENDING' CHECK (status IN ('PENDING','PAID','CANCELED')),
    total NUMERIC(10,2) NOT NULL CHECK (total > 0),
    created_at TIMESTAMPTZ NOT NULL DEFAULT now()
    );
    CREATE INDEX ON orders (user_id);
    CREATE INDEX ON orders (created_at);

    JSONB

    CREATE TABLE profiles (
    user_id BIGINT PRIMARY KEY REFERENCES users(user_id),
    attrs JSONB NOT NULL DEFAULT '{}',
    theme TEXT GENERATED ALWAYS AS (attrs->>'theme') STORED
    );
    CREATE INDEX profiles_attrs_gin ON profiles USING GIN (attrs);