PostgreSQL INSERT SELECT完全ガイド – データを賢くコピー・移行する方法

データベース・SQL

「別テーブルからデータをコピーしたい」

「条件に合うデータだけを新しいテーブルに移したい」

「複数のテーブルを結合して、結果を保存したい」

「大量データを効率的に別テーブルに投入したい」

そんな要望を一発で解決するのがINSERT SELECT文です!

INSERT SELECTを使えば、SELECT文の結果を直接別のテーブルに挿入できるため、データ移行やバックアップ、集計結果の保存などが驚くほど簡単になります。

この記事では、INSERT SELECTの基本から高度な活用法まで、実践的な例とともに分かりやすく解説していきます!

スポンサーリンク

INSERT SELECTの基本構文

基本的な書き方

INSERT INTO 挿入先テーブル (カラム1, カラム2, カラム3, ...)
SELECT カラム1, カラム2, カラム3, ...
FROM 取得元テーブル
WHERE 条件;

シンプルな例で理解する

-- 全データをコピー
INSERT INTO products_backup
SELECT * FROM products;

-- 特定のカラムだけコピー
INSERT INTO customer_names (id, full_name, email)
SELECT customer_id, name, email_address
FROM customers;

-- 条件付きでコピー
INSERT INTO active_users (user_id, username, last_login)
SELECT id, username, last_login_date
FROM users
WHERE status = 'active' 
  AND last_login_date > CURRENT_DATE - INTERVAL '30 days';

実践!よく使うINSERT SELECTパターン

パターン1:テーブルのバックアップ作成

-- バックアップテーブルを作成
CREATE TABLE orders_backup_20240101 AS TABLE orders WITH NO DATA;

-- データをコピー
INSERT INTO orders_backup_20240101
SELECT * FROM orders
WHERE order_date >= '2024-01-01' 
  AND order_date < '2024-02-01';

-- タイムスタンプ付きバックアップ
CREATE TABLE products_archive (
    archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    product_id INTEGER,
    product_name VARCHAR(200),
    price DECIMAL(10,2),
    status VARCHAR(50)
);

INSERT INTO products_archive (product_id, product_name, price, status)
SELECT id, name, price, status
FROM products
WHERE status = 'discontinued';

パターン2:データの集計と保存

-- 月次売上サマリーテーブル
CREATE TABLE monthly_sales_summary (
    year INTEGER,
    month INTEGER,
    category VARCHAR(100),
    total_sales DECIMAL(12,2),
    total_quantity INTEGER,
    average_price DECIMAL(10,2),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 集計結果を挿入
INSERT INTO monthly_sales_summary (year, month, category, total_sales, total_quantity, average_price)
SELECT 
    EXTRACT(YEAR FROM sale_date) as year,
    EXTRACT(MONTH FROM sale_date) as month,
    p.category,
    SUM(s.amount) as total_sales,
    SUM(s.quantity) as total_quantity,
    AVG(s.amount / NULLIF(s.quantity, 0)) as average_price
FROM sales s
JOIN products p ON s.product_id = p.id
WHERE sale_date >= '2024-01-01' AND sale_date < '2024-02-01'
GROUP BY 
    EXTRACT(YEAR FROM sale_date),
    EXTRACT(MONTH FROM sale_date),
    p.category;

-- 日次のアクティブユーザー数を記録
INSERT INTO daily_active_users (date, user_count, new_users, returning_users)
SELECT 
    CURRENT_DATE as date,
    COUNT(DISTINCT user_id) as user_count,
    COUNT(DISTINCT CASE WHEN created_date = CURRENT_DATE THEN user_id END) as new_users,
    COUNT(DISTINCT CASE WHEN created_date < CURRENT_DATE THEN user_id END) as returning_users
FROM user_activities
WHERE activity_date = CURRENT_DATE;

パターン3:データの変換と移行

-- 正規化されていないテーブルから正規化
-- 元テーブル:顧客と注文が1つのテーブルに
CREATE TABLE denormalized_orders (
    order_id INTEGER,
    customer_name VARCHAR(100),
    customer_email VARCHAR(200),
    customer_phone VARCHAR(20),
    product_name VARCHAR(100),
    quantity INTEGER,
    price DECIMAL(10,2)
);

-- 顧客マスタへの移行(重複除去)
INSERT INTO customers (name, email, phone)
SELECT DISTINCT 
    customer_name,
    customer_email,
    customer_phone
FROM denormalized_orders
WHERE NOT EXISTS (
    SELECT 1 FROM customers c
    WHERE c.email = denormalized_orders.customer_email
);

-- データ型の変換を含む移行
INSERT INTO products_new (
    product_code,
    product_name,
    price,
    tax_rate,
    price_with_tax
)
SELECT 
    UPPER(TRIM(product_code)),  -- 大文字に統一
    INITCAP(product_name),       -- 最初を大文字に
    CAST(price_string AS DECIMAL(10,2)),  -- 文字列から数値へ
    0.10,  -- デフォルト税率
    CAST(price_string AS DECIMAL(10,2)) * 1.10  -- 税込価格を計算
FROM products_old
WHERE price_string ~ '^[0-9]+\.?[0-9]*$';  -- 数値形式のみ

高度なINSERT SELECTテクニック

複数テーブルの結合(JOIN)

-- 複数テーブルから情報を統合
INSERT INTO customer_orders_summary (
    customer_id,
    customer_name,
    total_orders,
    total_amount,
    first_order_date,
    last_order_date,
    preferred_category
)
SELECT 
    c.id,
    c.name,
    COUNT(DISTINCT o.id) as total_orders,
    SUM(o.total_amount) as total_amount,
    MIN(o.order_date) as first_order_date,
    MAX(o.order_date) as last_order_date,
    (
        SELECT p.category
        FROM orders o2
        JOIN order_items oi ON o2.id = oi.order_id
        JOIN products p ON oi.product_id = p.id
        WHERE o2.customer_id = c.id
        GROUP BY p.category
        ORDER BY SUM(oi.quantity * oi.price) DESC
        LIMIT 1
    ) as preferred_category
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, c.name
HAVING COUNT(o.id) > 0;

UNION/UNION ALLを使った統合

-- 複数のソースからデータを統合
INSERT INTO all_transactions (
    transaction_date,
    transaction_type,
    amount,
    description,
    source_system
)
SELECT 
    payment_date as transaction_date,
    'PAYMENT' as transaction_type,
    amount,
    'Payment: ' || payment_method as description,
    'PAYMENT_SYSTEM' as source_system
FROM payments
WHERE payment_date >= '2024-01-01'

UNION ALL

SELECT 
    refund_date,
    'REFUND',
    -amount,  -- マイナスで記録
    'Refund: ' || reason,
    'REFUND_SYSTEM'
FROM refunds
WHERE refund_date >= '2024-01-01'

UNION ALL

SELECT 
    adjustment_date,
    'ADJUSTMENT',
    adjustment_amount,
    'Manual adjustment: ' || notes,
    'MANUAL'
FROM manual_adjustments
WHERE adjustment_date >= '2024-01-01';

WITH句(CTE)を使った複雑な処理

-- 複雑な集計を段階的に処理
WITH sales_by_category AS (
    SELECT 
        category_id,
        SUM(amount) as total_sales,
        COUNT(*) as transaction_count
    FROM sales
    WHERE sale_date >= CURRENT_DATE - INTERVAL '90 days'
    GROUP BY category_id
),
category_rankings AS (
    SELECT 
        category_id,
        total_sales,
        transaction_count,
        RANK() OVER (ORDER BY total_sales DESC) as sales_rank,
        PERCENT_RANK() OVER (ORDER BY total_sales) as percentile
    FROM sales_by_category
)
INSERT INTO category_performance (
    category_id,
    category_name,
    performance_tier,
    total_sales,
    transaction_count,
    sales_rank,
    evaluation_date
)
SELECT 
    cr.category_id,
    c.name,
    CASE 
        WHEN cr.percentile >= 0.8 THEN 'TOP'
        WHEN cr.percentile >= 0.5 THEN 'MIDDLE'
        ELSE 'LOW'
    END as performance_tier,
    cr.total_sales,
    cr.transaction_count,
    cr.sales_rank,
    CURRENT_DATE
FROM category_rankings cr
JOIN categories c ON cr.category_id = c.id;

ON CONFLICTで重複を賢く処理

UPSERT操作(INSERT or UPDATE)

-- 重複時は更新する
INSERT INTO product_inventory (
    product_id,
    warehouse_id,
    quantity,
    last_updated
)
SELECT 
    product_id,
    warehouse_id,
    SUM(quantity) as quantity,
    CURRENT_TIMESTAMP
FROM incoming_shipments
WHERE processed = false
GROUP BY product_id, warehouse_id
ON CONFLICT (product_id, warehouse_id) 
DO UPDATE SET 
    quantity = product_inventory.quantity + EXCLUDED.quantity,
    last_updated = EXCLUDED.last_updated;

-- 重複時は何もしない
INSERT INTO email_list (email, name, subscribe_date)
SELECT DISTINCT 
    email,
    name,
    CURRENT_DATE
FROM event_registrations
WHERE event_id = 123
ON CONFLICT (email) DO NOTHING;

-- 条件付き更新
INSERT INTO user_scores (user_id, game_id, high_score, play_count)
SELECT 
    user_id,
    game_id,
    MAX(score) as high_score,
    COUNT(*) as play_count
FROM game_sessions
WHERE session_date = CURRENT_DATE
GROUP BY user_id, game_id
ON CONFLICT (user_id, game_id) 
DO UPDATE SET 
    high_score = GREATEST(user_scores.high_score, EXCLUDED.high_score),
    play_count = user_scores.play_count + EXCLUDED.play_count
WHERE EXCLUDED.high_score > user_scores.high_score;  -- スコアが上回った時のみ更新

パフォーマンス最適化

大量データの効率的な挿入

-- バッチ処理で分割挿入
DO $$
DECLARE
    batch_size INTEGER := 10000;
    offset_val INTEGER := 0;
    total_rows INTEGER;
BEGIN
    SELECT COUNT(*) INTO total_rows FROM large_source_table;
    
    WHILE offset_val < total_rows LOOP
        INSERT INTO destination_table
        SELECT * FROM large_source_table
        ORDER BY id
        LIMIT batch_size
        OFFSET offset_val;
        
        offset_val := offset_val + batch_size;
        
        -- 進捗をログ出力
        RAISE NOTICE 'Processed % of % rows', offset_val, total_rows;
        
        -- 必要に応じてVACUUM
        IF offset_val % 50000 = 0 THEN
            VACUUM ANALYZE destination_table;
        END IF;
    END LOOP;
END $$;

-- インデックスを一時的に無効化
ALTER TABLE destination_table DISABLE TRIGGER ALL;
BEGIN;

INSERT INTO destination_table
SELECT * FROM source_table
WHERE created_at >= '2024-01-01';

COMMIT;
ALTER TABLE destination_table ENABLE TRIGGER ALL;

-- 並列処理を活用
SET max_parallel_workers_per_gather = 4;
SET parallel_setup_cost = 0;
SET parallel_tuple_cost = 0;

INSERT INTO parallel_destination
SELECT /*+ PARALLEL(4) */ *
FROM huge_source_table
WHERE conditions;

メモリ設定の最適化

-- セッション単位で調整
SET work_mem = '256MB';
SET maintenance_work_mem = '1GB';

-- 大量のINSERT SELECT実行
INSERT INTO analytics_results
SELECT 
    -- 複雑な集計処理
FROM large_table
GROUP BY multiple_columns;

-- 設定を元に戻す
RESET work_mem;
RESET maintenance_work_mem;

エラー処理とデバッグ

よくあるエラーと対処法

-- カラム数の不一致エラー
-- エラー: INSERT has more target columns than expressions

-- 悪い例
INSERT INTO users (id, name, email)
SELECT id, name FROM old_users;  -- emailが不足

-- 良い例
INSERT INTO users (id, name, email)
SELECT id, name, email FROM old_users;

-- またはデフォルト値を指定
INSERT INTO users (id, name, email)
SELECT id, name, 'no-email@example.com' FROM old_users;

-- データ型の不一致
-- エラー: column "price" is of type numeric but expression is of type text

-- 型変換を明示的に行う
INSERT INTO products (name, price)
SELECT 
    name,
    CAST(price_text AS NUMERIC(10,2))
FROM import_data
WHERE price_text ~ '^[0-9]+\.?[0-9]*$';  -- 数値形式のみ

-- NULL制約違反
-- エラー: null value in column "required_field" violates not-null constraint

-- NULL値を除外またはデフォルト値を設定
INSERT INTO orders (customer_id, order_date, status)
SELECT 
    customer_id,
    COALESCE(order_date, CURRENT_DATE),
    COALESCE(status, 'pending')
FROM temp_orders
WHERE customer_id IS NOT NULL;

トランザクション管理

-- 安全なデータ移行
BEGIN;

-- 移行前の件数確認
SELECT COUNT(*) as before_count FROM destination_table;

-- データ挿入
INSERT INTO destination_table
SELECT * FROM source_table
WHERE migration_flag = false;

-- 移行後の確認
SELECT COUNT(*) as after_count FROM destination_table;

-- データ検証
SELECT 
    'Source' as table_name, COUNT(*) as row_count
FROM source_table
WHERE migration_flag = false
UNION ALL
SELECT 
    'Destination' as table_name, COUNT(*) as row_count
FROM destination_table
WHERE created_at >= CURRENT_TIMESTAMP - INTERVAL '1 minute';

-- 問題がなければコミット
COMMIT;
-- 問題があればロールバック
-- ROLLBACK;

実践的な応用例

ログテーブルのアーカイブ

-- 古いログを月別テーブルにアーカイブ
CREATE OR REPLACE FUNCTION archive_logs()
RETURNS void AS $$
DECLARE
    archive_date DATE;
    table_name TEXT;
    row_count INTEGER;
BEGIN
    -- 3ヶ月以上前のデータをアーカイブ
    archive_date := DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 months');
    table_name := 'logs_' || TO_CHAR(archive_date, 'YYYY_MM');
    
    -- アーカイブテーブル作成
    EXECUTE format('
        CREATE TABLE IF NOT EXISTS %I (LIKE logs INCLUDING ALL)',
        table_name
    );
    
    -- データ移行
    EXECUTE format('
        INSERT INTO %I
        SELECT * FROM logs
        WHERE created_at >= %L 
          AND created_at < %L',
        table_name,
        archive_date,
        archive_date + INTERVAL '1 month'
    );
    
    GET DIAGNOSTICS row_count = ROW_COUNT;
    RAISE NOTICE 'Archived % rows to %', row_count, table_name;
    
    -- 元テーブルから削除
    DELETE FROM logs
    WHERE created_at >= archive_date 
      AND created_at < archive_date + INTERVAL '1 month';
END;
$$ LANGUAGE plpgsql;

データマート作成

-- 分析用の非正規化テーブル作成
INSERT INTO customer_360_view
WITH customer_orders AS (
    SELECT 
        customer_id,
        COUNT(*) as order_count,
        SUM(total_amount) as lifetime_value,
        MAX(order_date) as last_order_date
    FROM orders
    GROUP BY customer_id
),
customer_products AS (
    SELECT 
        o.customer_id,
        ARRAY_AGG(DISTINCT p.category) as purchased_categories,
        COUNT(DISTINCT p.id) as unique_products
    FROM orders o
    JOIN order_items oi ON o.id = oi.order_id
    JOIN products p ON oi.product_id = p.id
    GROUP BY o.customer_id
),
customer_engagement AS (
    SELECT 
        user_id as customer_id,
        COUNT(*) as login_count,
        MAX(login_date) as last_login
    FROM user_logins
    GROUP BY user_id
)
SELECT 
    c.id as customer_id,
    c.name,
    c.email,
    c.created_at as registration_date,
    COALESCE(co.order_count, 0) as total_orders,
    COALESCE(co.lifetime_value, 0) as lifetime_value,
    co.last_order_date,
    COALESCE(cp.purchased_categories, ARRAY[]::VARCHAR[]) as categories,
    COALESCE(cp.unique_products, 0) as product_variety,
    COALESCE(ce.login_count, 0) as engagement_score,
    ce.last_login,
    CASE 
        WHEN co.lifetime_value > 10000 THEN 'VIP'
        WHEN co.lifetime_value > 5000 THEN 'Gold'
        WHEN co.lifetime_value > 1000 THEN 'Silver'
        ELSE 'Bronze'
    END as customer_tier,
    CURRENT_TIMESTAMP as last_updated
FROM customers c
LEFT JOIN customer_orders co ON c.id = co.customer_id
LEFT JOIN customer_products cp ON c.id = cp.customer_id
LEFT JOIN customer_engagement ce ON c.id = ce.customer_id;

RETURNING句で挿入したデータを取得

-- 挿入したデータのIDを取得
WITH inserted AS (
    INSERT INTO orders (customer_id, order_date, status)
    SELECT 
        customer_id,
        CURRENT_TIMESTAMP,
        'pending'
    FROM shopping_carts
    WHERE session_id = 'abc123'
    RETURNING id, customer_id
)
INSERT INTO order_items (order_id, product_id, quantity, price)
SELECT 
    i.id,
    sc.product_id,
    sc.quantity,
    p.price
FROM inserted i
JOIN shopping_carts sc ON i.customer_id = sc.customer_id
JOIN products p ON sc.product_id = p.id
WHERE sc.session_id = 'abc123';

-- 挿入結果をログに記録
INSERT INTO audit_log (table_name, operation, record_count, details)
SELECT 
    'products',
    'INSERT',
    COUNT(*),
    JSON_BUILD_OBJECT(
        'source', 'product_import',
        'imported_at', CURRENT_TIMESTAMP,
        'ids', ARRAY_AGG(id)
    )
FROM (
    INSERT INTO products (name, price, category)
    SELECT name, price, category
    FROM product_import
    WHERE processed = false
    RETURNING id
) AS inserted_products;

まとめ – INSERT SELECTで効率的なデータ操作を実現!

PostgreSQLのINSERT SELECT文について、基本から応用まで詳しく解説してきました。

重要ポイント:

  • SELECT結果を直接INSERTできる強力な機能
  • JOINやCTEと組み合わせて複雑な処理も可能
  • ON CONFLICTでUPSERT操作も実現
  • 大量データは分割処理でパフォーマンス向上
  • RETURNING句で挿入データの取得も可能

使いどころ:

  • テーブルのバックアップ・アーカイブ
  • データの集計と保存
  • ETL処理でのデータ変換
  • 正規化・非正規化の実行
  • ログデータの整理

INSERT SELECTをマスターすれば、データ移行や集計処理が驚くほど簡単になります。

ぜひ実際のプロジェクトで活用してみてください!

コメント

タイトルとURLをコピーしました