PostgreSQL vs MySQL | Deep Comparison | When to Use Each

PostgreSQL vs MySQL | Deep Comparison | When to Use Each

이 글의 핵심

PostgreSQL and MySQL are both excellent relational databases, but they excel in different scenarios. This guide covers concrete differences in types, JSON, full-text search, locking, and replication — so you can make the right choice for your project.

Quick Decision Matrix

Use PostgreSQL when:
  ✅ Complex queries with JOINs and aggregations
  ✅ JSON/JSONB document storage alongside relational data
  ✅ Full-text search without external engine
  ✅ Custom data types, enums, arrays
  ✅ Geospatial data (PostGIS extension)
  ✅ Strict SQL standard compliance matters
  ✅ Strong ACID requirements with complex transactions

Use MySQL when:
  ✅ Simple read-heavy workloads
  ✅ Existing MySQL infrastructure
  ✅ PlanetScale (MySQL-compatible serverless DB)
  ✅ WordPress, Drupal, or other MySQL-native CMS
  ✅ Team expertise is MySQL-based
  ✅ Simpler replication/cluster setup is a priority

Data Types Comparison

-- Auto-increment primary keys
-- MySQL
CREATE TABLE users (
  id INT AUTO_INCREMENT PRIMARY KEY,
  ...
);

-- PostgreSQL (modern way)
CREATE TABLE users (
  id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
  ...
);

-- PostgreSQL (traditional, still works)
CREATE TABLE users (
  id SERIAL PRIMARY KEY,  -- shorthand for INTEGER + sequence
  ...
);
-- String types
-- MySQL
VARCHAR(255)    -- variable length, up to 255 chars
TEXT            -- up to 65KB
MEDIUMTEXT      -- up to 16MB
LONGTEXT        -- up to 4GB

-- PostgreSQL
VARCHAR(255)    -- variable length (rarely needed — use TEXT)
TEXT            -- unlimited length (no performance difference from VARCHAR)
-- PostgreSQL has NO size limit on TEXT — use it everywhere

-- PostgreSQL extra types MySQL lacks:
CITEXT          -- case-insensitive text (extension)
UUID            -- native UUID type (MySQL stores as VARCHAR)
INET            -- IP address
MACADDR         -- MAC address
MONEY           -- monetary amounts
BYTEA           -- binary data (MySQL: BLOB)
-- Temporal types
-- MySQL
DATETIME        -- YYYY-MM-DD HH:MM:SS (no timezone)
TIMESTAMP       -- stored as UTC, displayed in server timezone
DATE, TIME

-- PostgreSQL
TIMESTAMP WITH TIME ZONE (TIMESTAMPTZ)  -- stores UTC, shows with offset
TIMESTAMP WITHOUT TIME ZONE             -- no timezone info
DATE, TIME, INTERVAL                    -- interval: '2 hours 30 minutes'

-- PostgreSQL is stricter: use TIMESTAMPTZ for any production timestamp
-- MySQL TIMESTAMP has 2038 problem (32-bit Unix timestamp)

JSON Support

PostgreSQL’s JSONB is a significant advantage for mixed relational/document storage.

-- PostgreSQL JSONB
CREATE TABLE products (
  id SERIAL PRIMARY KEY,
  name TEXT NOT NULL,
  attributes JSONB            -- Binary JSON — indexed, queryable
);

-- Insert
INSERT INTO products (name, attributes) VALUES
  ('Laptop', '{"brand": "Dell", "specs": {"ram": 16, "storage": 512}, "tags": ["gaming", "work"]}');

-- Query JSONB fields
SELECT name, attributes->>'brand' AS brand
FROM products
WHERE attributes->>'brand' = 'Dell';

-- Deep path access
SELECT name, attributes->'specs'->>'ram' AS ram_gb
FROM products
WHERE (attributes->'specs'->>'ram')::int > 8;

-- Array containment
SELECT * FROM products
WHERE attributes->'tags' ? 'gaming';      -- contains key/element

SELECT * FROM products
WHERE attributes @> '{"specs": {"ram": 16}}';  -- contains subset

-- Index on JSONB — critical for performance
CREATE INDEX idx_products_brand ON products USING GIN (attributes);
-- Or for specific field:
CREATE INDEX idx_products_brand_btree ON products ((attributes->>'brand'));

-- Update specific field (without replacing entire JSON)
UPDATE products
SET attributes = jsonb_set(attributes, '{specs, ram}', '32')
WHERE id = 1;
-- MySQL JSON (added in 5.7, improved in 8.0)
CREATE TABLE products (
  id INT AUTO_INCREMENT PRIMARY KEY,
  name VARCHAR(255),
  attributes JSON
);

-- Query
SELECT name, JSON_EXTRACT(attributes, '$.brand') AS brand
FROM products
WHERE JSON_EXTRACT(attributes, '$.brand') = '"Dell"';

-- Shorthand
SELECT name, attributes->>'$.brand' AS brand
FROM products;

-- MySQL lacks JSONB — JSON stored as text, parsed on every access
-- No GIN index support — only functional indexes on specific paths
CREATE INDEX idx_brand ON products ((attributes->>'$.brand'));

Winner: PostgreSQL — JSONB is stored binary (faster reads), supports GIN indexes (fast containment queries), and has richer operators.


-- PostgreSQL built-in FTS
CREATE TABLE articles (
  id SERIAL PRIMARY KEY,
  title TEXT,
  body TEXT,
  search_vector TSVECTOR GENERATED ALWAYS AS (
    setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||
    setweight(to_tsvector('english', COALESCE(body, '')), 'B')
  ) STORED
);

CREATE INDEX idx_articles_search ON articles USING GIN (search_vector);

-- Search
SELECT id, title,
  ts_rank(search_vector, query) AS rank
FROM articles,
  to_tsquery('english', 'javascript & (tutorial | guide)') AS query
WHERE search_vector @@ query
ORDER BY rank DESC;

-- Highlight matching terms
SELECT title,
  ts_headline('english', body, to_tsquery('javascript'), 'MaxWords=20') AS excerpt
FROM articles
WHERE search_vector @@ to_tsquery('english', 'javascript');
-- MySQL FULLTEXT (simpler, less powerful)
CREATE TABLE articles (
  id INT AUTO_INCREMENT PRIMARY KEY,
  title VARCHAR(255),
  body TEXT,
  FULLTEXT INDEX idx_search (title, body)
);

-- Boolean mode search
SELECT id, title,
  MATCH(title, body) AGAINST('javascript tutorial' IN BOOLEAN MODE) AS score
FROM articles
WHERE MATCH(title, body) AGAINST('+javascript +tutorial' IN BOOLEAN MODE)
ORDER BY score DESC;

-- MySQL FULLTEXT limitations:
-- Minimum word length (default 4 chars)
-- No phrase proximity search
-- English-only stemming
-- No custom ranking control

Winner: PostgreSQL — More powerful with configurable dictionaries, proximity search, rich ranking, and the ability to index JSONB alongside text.


Transactions and Locking

-- PostgreSQL: MVCC (Multi-Version Concurrency Control)
-- Readers never block writers, writers never block readers

-- Isolation levels
BEGIN TRANSACTION ISOLATION LEVEL READ COMMITTED;  -- default
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;    -- strictest

-- PostgreSQL serializable actually detects and prevents anomalies
-- (MySQL serializable uses locks, not true serializability)

-- Explicit row locking
SELECT * FROM orders WHERE id = 1 FOR UPDATE;           -- lock for update
SELECT * FROM orders WHERE id = 1 FOR UPDATE SKIP LOCKED; -- skip locked rows (queue pattern)
SELECT * FROM orders WHERE id = 1 FOR SHARE;            -- shared lock
-- MySQL InnoDB also uses MVCC
-- But has some differences:

-- Phantom reads in REPEATABLE READ:
-- MySQL prevents them with gap locks (can cause deadlocks)
-- PostgreSQL prevents them with MVCC snapshot (no gap locks)

-- Table locking:
-- MySQL MyISAM: table-level locks (avoid for writes)
-- MySQL InnoDB: row-level locks (same as PostgreSQL)

Window Functions and CTEs

Both support these, but PostgreSQL has broader support.

-- Window functions (both support)
SELECT
  name,
  department,
  salary,
  AVG(salary) OVER (PARTITION BY department) AS dept_avg,
  RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
  LAG(salary, 1) OVER (ORDER BY hire_date) AS prev_salary
FROM employees;

-- CTEs (both support)
WITH ranked_sales AS (
  SELECT
    salesperson_id,
    SUM(amount) AS total,
    RANK() OVER (ORDER BY SUM(amount) DESC) AS rank
  FROM sales
  GROUP BY salesperson_id
)
SELECT * FROM ranked_sales WHERE rank <= 10;

-- Recursive CTEs (both support)
WITH RECURSIVE org_chart AS (
  SELECT id, name, manager_id, 0 AS depth
  FROM employees WHERE manager_id IS NULL
  UNION ALL
  SELECT e.id, e.name, e.manager_id, depth + 1
  FROM employees e
  JOIN org_chart o ON e.manager_id = o.id
)
SELECT * FROM org_chart ORDER BY depth;

-- PostgreSQL extras:
-- FILTER clause in aggregations
SELECT
  COUNT(*) FILTER (WHERE status = 'active') AS active_count,
  COUNT(*) FILTER (WHERE status = 'inactive') AS inactive_count
FROM users;

-- LATERAL joins
SELECT u.name, recent.order_id
FROM users u,
LATERAL (
  SELECT order_id FROM orders WHERE user_id = u.id
  ORDER BY created_at DESC LIMIT 3
) AS recent;

Indexes

-- Standard B-tree index (both)
CREATE INDEX idx_users_email ON users (email);

-- Partial index — PostgreSQL (index subset of rows)
CREATE INDEX idx_active_users ON users (email) WHERE active = true;
-- Only indexes active users — smaller, faster for queries with WHERE active = true

-- Composite index
CREATE INDEX idx_orders_user_date ON orders (user_id, created_at DESC);

-- GIN index — PostgreSQL (arrays, JSONB, full-text)
CREATE INDEX idx_tags ON posts USING GIN (tags);         -- array column
CREATE INDEX idx_meta ON posts USING GIN (metadata);     -- JSONB column

-- GiST index — PostgreSQL (geometric, range types, full-text)
CREATE INDEX idx_location ON places USING GIST (coordinates);  -- PostGIS

-- MySQL-specific
-- FULLTEXT index
CREATE FULLTEXT INDEX idx_search ON articles (title, body);

-- Invisible index (MySQL 8.0) — test impact without dropping
ALTER TABLE users ALTER INDEX idx_email INVISIBLE;

-- Both: covering index (include extra columns to avoid table lookups)
-- PostgreSQL:
CREATE INDEX idx_orders_covering ON orders (user_id) INCLUDE (status, total);
-- MySQL:
CREATE INDEX idx_orders_covering ON orders (user_id, status, total);

Replication

PostgreSQL replication options:
  Streaming replication (built-in)
    - Primary → one or more standbys
    - Synchronous or asynchronous
    - Standbys can be used for read queries
  Logical replication (PostgreSQL 10+)
    - Table-level, row-level filtering
    - Replicate to different PostgreSQL versions
  Third-party: Patroni (HA), pgpool-II (connection pooling + HA)

MySQL replication options:
  Classic async replication (simple, widely used)
  Semi-synchronous replication (primary waits for at least one replica)
  Group Replication (multi-primary, built-in)
  InnoDB Cluster (MySQL Shell + Group Replication)
  ProxySQL (connection routing to primary/replicas)
-- PostgreSQL: check replication status
SELECT
  client_addr,
  state,
  sent_lsn - write_lsn AS write_lag,
  sent_lsn - flush_lsn AS flush_lag,
  sent_lsn - replay_lsn AS replay_lag
FROM pg_stat_replication;

-- MySQL: check replication status
SHOW REPLICA STATUS\G
-- Shows: Seconds_Behind_Source, Replica_IO_Running, Replica_SQL_Running

Performance Tips

-- EXPLAIN ANALYZE (PostgreSQL)
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT u.name, COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON o.user_id = u.id
WHERE u.created_at > NOW() - INTERVAL '30 days'
GROUP BY u.id;

-- Key things to look for:
-- Seq Scan → missing index
-- Hash Join vs Nested Loop vs Merge Join
-- Actual rows vs Estimated rows (large diff = stale statistics)
-- Buffers: hit vs read (cache hit ratio)

-- Update statistics
ANALYZE users;  -- Update table statistics
VACUUM ANALYZE; -- Reclaim space + update statistics

-- EXPLAIN in MySQL
EXPLAIN FORMAT=JSON
SELECT u.name, COUNT(o.id)
FROM users u
LEFT JOIN orders o ON o.user_id = u.id
WHERE u.created_at > DATE_SUB(NOW(), INTERVAL 30 DAY)
GROUP BY u.id;

-- Check index usage
SHOW INDEX FROM users;
SELECT * FROM information_schema.STATISTICS WHERE table_name = 'users';
-- Connection pooling (critical for both)
-- PostgreSQL: PgBouncer
-- MySQL: ProxySQL or MySQL Router

-- PostgreSQL: table bloat (MVCC creates dead tuples)
-- Regular VACUUM prevents bloat
ALTER TABLE orders SET (autovacuum_vacuum_scale_factor = 0.05);  -- More frequent vacuum for large tables

-- PostgreSQL: check table size
SELECT
  relname AS table_name,
  pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
  pg_size_pretty(pg_relation_size(relid)) AS table_size,
  pg_size_pretty(pg_indexes_size(relid)) AS index_size
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC;

Extensions (PostgreSQL Only)

PostgreSQL’s extension system is a significant differentiator.

-- List available extensions
SELECT name, default_version, comment FROM pg_available_extensions ORDER BY name;

-- PostGIS — geospatial data
CREATE EXTENSION postgis;
SELECT ST_Distance(
  ST_GeographyFromText('POINT(-122.4194 37.7749)'),  -- San Francisco
  ST_GeographyFromText('POINT(-118.2437 34.0522)')   -- Los Angeles
) / 1000 AS distance_km;

-- pg_trgm — trigram similarity (fuzzy search)
CREATE EXTENSION pg_trgm;
CREATE INDEX idx_name_trgm ON users USING GIN (name gin_trgm_ops);
SELECT name FROM users WHERE name % 'Alce';  -- typo-tolerant search
SELECT name, similarity(name, 'Alice') AS sim FROM users ORDER BY sim DESC LIMIT 10;

-- uuid-ossp — UUID generation
CREATE EXTENSION "uuid-ossp";
INSERT INTO items (id, name) VALUES (uuid_generate_v4(), 'Widget');

-- pg_stat_statements — query performance tracking
CREATE EXTENSION pg_stat_statements;
SELECT query, calls, mean_exec_time, total_exec_time
FROM pg_stat_statements
ORDER BY total_exec_time DESC LIMIT 10;

-- timescaledb — time-series data
CREATE EXTENSION timescaledb;
SELECT create_hypertable('metrics', 'time');

Node.js Integration

// PostgreSQL with node-postgres (pg)
import { Pool } from 'pg';

const pool = new Pool({
  connectionString: process.env.DATABASE_URL,
  max: 20,
  idleTimeoutMillis: 30000,
});

const result = await pool.query(
  'SELECT * FROM users WHERE email = $1',  // Parameterized query (prevents SQL injection)
  ['alice@example.com']
);

// MySQL with mysql2
import mysql from 'mysql2/promise';

const pool = mysql.createPool({
  uri: process.env.DATABASE_URL,
  waitForConnections: true,
  connectionLimit: 20,
});

const [rows] = await pool.execute(
  'SELECT * FROM users WHERE email = ?',   // ? for parameters
  ['alice@example.com']
);
// Both work with ORMs — Prisma supports both
// prisma/schema.prisma
datasource db {
  provider = "postgresql"  // or "mysql"
  url      = env("DATABASE_URL")
}

Related posts: