Section 5: JSON Functions — Working with JSON data inside MySQL

Setup: Creating the user_profiles Table

All JSON examples use this table. Run this first to follow along.

-- Create a table with a JSON column for flexible user metadata
CREATE TABLE user_profiles (
    id       INT PRIMARY KEY AUTO_INCREMENT,
    username VARCHAR(100),
    metadata JSON
);

-- Insert sample records with realistic profile JSON data
INSERT INTO user_profiles (username, metadata) VALUES
('maria_s', '{"theme": "dark", "tags": ["Premium", "Early Adopter"],
  "address": {"street": "123 Main St", "city": "Austin", "zip": "73301"},
  "social": {"provider": "Google", "plan": "Pro", "id": "GL-90021"}}'),

('james_r', '{"theme": "light", "tags": ["Enterprise"],
  "address": {"street": "456 Oak Ave", "city": "Denver", "zip": "80201"},
  "social": {"provider": "GitHub", "plan": "Team", "id": "GH-44010"}}'),

('liza_c', '{"theme": "auto", "tags": [],
  "address": {"street": "789 Pine Rd", "city": "Seattle", "zip": "98101"},
  "social": {"provider": "Google", "plan": "Free", "id": "GL-77055"}}');
1

JSON_OBJECT('key', value, ...)

Builds a JSON object on the fly from pairs of keys and values. Think of it like creating a mini data card right inside your query. You pass the key name, then its value, then the next key, and so on.

SELECT
    JSON_OBJECT(
        'user',  username,
        'theme', metadata->>'$.theme'
    ) AS user_card
FROM user_profiles
LIMIT 2;
Output
user_card
{"user": "maria_s", "theme": "dark"}
{"user": "james_r", "theme": "light"}
2

JSON_ARRAY(val1, val2, ...)

Creates a JSON array (a list) from the values you give it. It is like putting items into a numbered list. Each value becomes one element in the array.

SELECT
    JSON_ARRAY('Laptop', 'Keyboard', 'Monitor') AS product_list,
    JSON_ARRAY(29.99, 49.99, 199.99)             AS prices;
Output
product_listprices
["Laptop", "Keyboard", "Monitor"][29.99, 49.99, 199.99]
3

JSON_EXTRACT(col, '$.key')

Pulls a specific value out of a JSON column using a path. The $ means "the root of the JSON," and then you drill into it with dot notation. The result keeps its JSON formatting (strings come back with quotes around them).

SELECT
    username,
    JSON_EXTRACT(metadata, '$.theme')           AS theme,
    JSON_EXTRACT(metadata, '$.tags')            AS tags,
    JSON_EXTRACT(metadata, '$.social.provider') AS provider
FROM user_profiles;
Output
usernamethemetagsprovider
maria_s"dark"["Premium", "Early Adopter"]"Google"
james_r"light"["Enterprise"]"GitHub"
liza_c"auto"[]"Google"
4

col->'$.key' — Shorthand for JSON_EXTRACT

The arrow operator -> is just a shorter way to write JSON_EXTRACT. It does exactly the same thing but saves typing. The result still has JSON quotes around string values.

SELECT
    username,
    metadata->'$.theme'            AS theme,
    metadata->'$.address.city'    AS city
FROM user_profiles;
Output
usernamethemecity
maria_s"dark""Austin"
james_r"light""Denver"
liza_c"auto""Seattle"
5

JSON_UNQUOTE(JSON_EXTRACT(...))

JSON_EXTRACT returns strings with double quotes around them (like "dark"). JSON_UNQUOTE strips those quotes off so you get a plain string (like dark). You wrap the extract call inside the unquote call.

SELECT
    username,
    JSON_EXTRACT(metadata, '$.theme')                     AS with_quotes,
    JSON_UNQUOTE(JSON_EXTRACT(metadata, '$.theme'))     AS without_quotes
FROM user_profiles;
Output
usernamewith_quoteswithout_quotes
maria_s"dark"dark
james_r"light"light
liza_c"auto"auto
6

col->>'$.key' — Shorthand for Unquoted Extract

The double-arrow ->> combines JSON_EXTRACT and JSON_UNQUOTE into one shortcut. This is what you will use most of the time when you want a clean, plain-text value from a JSON column.

SELECT
    username,
    metadata->>'$.theme'            AS theme,
    metadata->>'$.social.provider'  AS provider,
    metadata->>'$.address.zip'      AS zip_code
FROM user_profiles;
Output
usernamethemeproviderzip_code
maria_sdarkGoogle73301
james_rlightGitHub80201
liza_cautoGoogle98101
7

JSON_SET(col, '$.key', value)

Updates an existing key or creates a new one. If the key already exists, its value gets replaced. If the key does not exist, it gets added. Think of it as "set this no matter what."

-- Add a new key 'locale' and update existing 'theme'
SELECT
    username,
    JSON_SET(
        metadata,
        '$.locale', 'en-US',
        '$.theme',  'solarized'
    ) AS updated_metadata
FROM user_profiles
WHERE id = 1;
Output (formatted for clarity)
usernameupdated_metadata (key changes highlighted)
maria_s{"theme": "solarized", "tags": ["Premium", "Early Adopter"], "address": {...}, "social": {...}, "locale": "en-US"}
8

JSON_INSERT(col, '$.key', value)

Only adds a key if it does not already exist. If the key is already there, it does nothing. This is the safe way to add new fields without accidentally overwriting data.

-- 'theme' already exists, so it will NOT change
-- 'referral_source' is new, so it WILL be added
SELECT
    username,
    metadata->>'$.theme' AS original_theme,
    JSON_INSERT(
        metadata,
        '$.theme',           'X-CHANGED',
        '$.referral_source',  'Newsletter'
    )->>'$.theme' AS still_same_theme,
    JSON_INSERT(
        metadata,
        '$.referral_source',  'Newsletter'
    )->>'$.referral_source' AS new_referral
FROM user_profiles
WHERE id = 1;
Output
usernameoriginal_themestill_same_themenew_referral
maria_sdarkdarkNewsletter
9

JSON_REPLACE(col, '$.key', value)

Only changes a key that already exists. If the key is not there, nothing happens. This is the opposite of JSON_INSERT. Use it when you want to update but never accidentally create new keys.

-- 'theme' exists => will change.  'locale' doesn't exist => ignored.
SELECT
    username,
    JSON_REPLACE(
        metadata,
        '$.theme',  'high-contrast',
        '$.locale', 'fr-FR'
    )->>'$.theme' AS replaced_theme,
    JSON_REPLACE(
        metadata,
        '$.locale', 'fr-FR'
    )->'$.locale' AS locale_result
FROM user_profiles
WHERE id = 2;
Output
usernamereplaced_themelocale_result
james_rhigh-contrastNULL
10

JSON_REMOVE(col, '$.key')

Deletes a key (and its value) from a JSON document. The key simply disappears. If the key does not exist, nothing happens.

-- Remove the social info from the JSON
SELECT
    username,
    JSON_KEYS(metadata)                              AS before_keys,
    JSON_KEYS(JSON_REMOVE(metadata, '$.social'))     AS after_keys
FROM user_profiles
WHERE id = 1;
Output
usernamebefore_keysafter_keys
maria_s["address", "social", "tags", "theme"]["address", "tags", "theme"]
11

JSON_CONTAINS(col, value)

Checks whether a JSON document contains a specific value. Returns 1 (true) or 0 (false). You can also give it a path to search within a specific part of the JSON. Great for asking questions like "does this user have Premium in their tags list?"

SELECT
    username,
    JSON_CONTAINS(metadata->'$.tags', '"Premium"')       AS has_premium,
    JSON_CONTAINS(metadata->'$.tags', '"Enterprise"')    AS has_enterprise
FROM user_profiles;
Output
usernamehas_premiumhas_enterprise
maria_s10
james_r01
liza_c00
13

JSON_MERGE_PATCH(json1, json2)

Merges two JSON objects together. If both have the same key, the second one wins (it replaces the first). This follows the RFC 7396 standard. Think of it as "layer the second on top of the first."

SELECT JSON_MERGE_PATCH(
    '{"item": "Laptop", "status": "pending", "priority": "normal"}',
    '{"status": "shipped", "tracking": "UPS-98765"}'
) AS merged;
Output
merged
{"item": "Laptop", "status": "shipped", "priority": "normal", "tracking": "UPS-98765"}
14

JSON_MERGE_PRESERVE(json1, json2)

Merges two JSON documents, but when the same key exists in both, it keeps both values by creating an array. It preserves everything instead of overwriting. Arrays get concatenated together.

SELECT JSON_MERGE_PRESERVE(
    '{"tags": ["Premium"], "category": "Electronics"}',
    '{"tags": ["Wholesale"],  "category": "Office Supplies"}'
) AS merged;
Output
merged
{"tags": ["Premium", "Wholesale"], "category": ["Electronics", "Office Supplies"]}
15

JSON_KEYS(col)

Returns an array of all the top-level key names in a JSON object. It is like asking "what fields does this JSON have?" You can also pass a path to get keys of a nested object.

SELECT
    username,
    JSON_KEYS(metadata)                       AS top_keys,
    JSON_KEYS(metadata, '$.social')           AS social_keys
FROM user_profiles
WHERE id = 1;
Output
usernametop_keyssocial_keys
maria_s["address", "social", "tags", "theme"]["id", "plan", "provider"]
16

JSON_LENGTH(col)

Counts the number of elements. For an object, it counts the keys. For an array, it counts the items. You can also pass a path to count inside a nested part of the document.

SELECT
    username,
    JSON_LENGTH(metadata)                   AS total_keys,
    JSON_LENGTH(metadata, '$.tags')        AS tag_count
FROM user_profiles;
Output
usernametotal_keystag_count
maria_s42
james_r41
liza_c40
17

JSON_TYPE(col)

Tells you what type of JSON value you are looking at: OBJECT, ARRAY, STRING, INTEGER, DOUBLE, BOOLEAN, or NULL. Useful for debugging when you are not sure what is stored.

SELECT
    JSON_TYPE(metadata)                            AS root_type,
    JSON_TYPE(metadata->'$.theme')               AS theme_type,
    JSON_TYPE(metadata->'$.tags')                AS tags_type,
    JSON_TYPE('42')                                 AS num_type,
    JSON_TYPE('true')                               AS bool_type
FROM user_profiles
WHERE id = 1;
Output
root_typetheme_typetags_typenum_typebool_type
OBJECTSTRINGARRAYINTEGERBOOLEAN
18

JSON_VALID(string)

Checks whether a string is valid JSON. Returns 1 if it is valid, 0 if it is not. Use this to validate data before trying to store it in a JSON column. Bad JSON will cause errors otherwise.

SELECT
    JSON_VALID('{"theme": "dark"}')              AS valid_object,
    JSON_VALID('["Laptop", "Keyboard"]')          AS valid_array,
    JSON_VALID('not json at all')               AS invalid_string,
    JSON_VALID('{"broken": }')                  AS broken_json,
    JSON_VALID(NULL)                              AS null_input;
Output
valid_objectvalid_arrayinvalid_stringbroken_jsonnull_input
1100NULL
19

JSON_TABLE(col, path COLUMNS(...)) — Convert JSON to Rows

This is one of the most powerful JSON functions. It takes a JSON array and "unrolls" it into regular database rows and columns. Imagine you have a list of tags stored as a JSON array inside one column. JSON_TABLE lets you turn each tag into its own row, so you can query them like a normal table. You use it in the FROM clause with a COLUMNS definition that maps JSON paths to column names and types.

-- Turn the tags JSON array into individual rows
SELECT
    up.username,
    t.row_num,
    t.tag_name
FROM user_profiles up,
     JSON_TABLE(
         up.metadata->'$.tags',
         '$[*]' COLUMNS (
             row_num  FOR ORDINALITY,
             tag_name VARCHAR(100) PATH '$'
         )
     ) AS t
WHERE JSON_LENGTH(up.metadata->'$.tags') > 0;
Output
usernamerow_numtag_name
maria_s1Premium
maria_s2Early Adopter
james_r1Enterprise

Here is a more advanced example that extracts nested object fields into columns:

-- Extract the nested social object into proper columns
SELECT
    up.username,
    s.provider,
    s.plan_name,
    s.account_id
FROM user_profiles up,
     JSON_TABLE(
         up.metadata,
         '$' COLUMNS (
             provider   VARCHAR(50)  PATH '$.social.provider',
             plan_name  VARCHAR(50)  PATH '$.social.plan',
             account_id VARCHAR(50)  PATH '$.social.id'
         )
     ) AS s;
Output
usernameproviderplan_nameaccount_id
maria_sGoogleProGL-90021
james_rGitHubTeamGH-44010
liza_cGoogleFreeGL-77055
20

JSON_PRETTY(col)

Formats a JSON value with nice indentation and line breaks so humans can read it easily. This does not change the data; it just makes it look nicer when you display it.

SELECT JSON_PRETTY(metadata) AS formatted
FROM user_profiles
WHERE id = 3;
Output
formatted
{
  "theme": "auto",
  "tags": [],
  "address": {
    "street": "789 Pine Rd",
    "city": "Seattle",
    "zip": "98101"
  },
  "social": {
    "provider": "Google",
    "plan": "Free",
    "id": "GL-77055"
  }
}
Section 6: String Functions — Manipulating text data

Context: Business Data for String Examples

These examples use inline values and references to typical business text like customer names, product SKUs, order codes, and shipping notes.

1

CONCAT(str1, str2, ...)

Joins two or more strings together end-to-end. If any argument is NULL, the whole result becomes NULL. Use this to build full names, formatted IDs, or combine any text values.

SELECT
    CONCAT('Ms. ', 'Elena', ' ', 'Mendoza')     AS full_name,
    CONCAT('PRD-', '2024', '-', '00587')       AS product_sku,
    CONCAT('Result: ', NULL)                    AS null_trap;
Output
full_nameproduct_skunull_trap
Ms. Elena MendozaPRD-2024-00587NULL
2

CONCAT_WS(separator, str1, str2, ...)

Stands for "Concat With Separator." It joins strings with a separator between each one. The big advantage over CONCAT: it skips NULL values instead of making everything NULL. Perfect for building addresses or comma-separated lists.

SELECT
    CONCAT_WS(' | ', 'Laptop', 'Keyboard', 'Monitor')   AS product_list,
    CONCAT_WS(', ', 'Suite 401', NULL, 'Tower A')       AS location;
Output
product_listlocation
Laptop | Keyboard | MonitorSuite 401, Tower A
3

SUBSTRING(str, pos, len)

Extracts a piece of a string starting at position pos for len characters. Positions start at 1 (not 0). If you leave off len, it goes to the end.

SELECT
    SUBSTRING('PRD-2024-00587', 5, 4)   AS year_part,
    SUBSTRING('PRD-2024-00587', 10)      AS sequence_num,
    SUBSTRING('Wireless Mouse', 1, 4)    AS abbrev;
Output
year_partsequence_numabbrev
202400587Wire
4

LEFT(str, len)

Grabs the first len characters from the left side of a string. A quicker way to write SUBSTRING(str, 1, len).

SELECT
    LEFT('Wireless Bluetooth Mouse', 8)  AS short_name,
    LEFT('GL-90021', 2)                   AS provider_code;
Output
short_nameprovider_code
WirelessGL
6

LENGTH(str)

Returns the length of a string in bytes. For regular English text, one character equals one byte. But for special characters or emojis, a single character can be multiple bytes. If you want to count characters, use CHAR_LENGTH instead.

SELECT
    LENGTH('SKU')                 AS len_sku,
    LENGTH('Wireless Mouse')     AS len_product,
    LENGTH('')                    AS len_empty;
Output
len_skulen_productlen_empty
3140
7

CHAR_LENGTH(str)

Returns the number of characters in a string (not bytes). This is the one you should usually use when counting how many characters something has, because it handles multi-byte characters correctly.

SELECT
    CHAR_LENGTH('Ms. Cruz')               AS char_count,
    LENGTH('Ms. Cruz')                    AS byte_count,
    CHAR_LENGTH('Order Invoice #4021')     AS invoice_len;
Output
char_countbyte_countinvoice_len
8819
8

UPPER(str)

Converts every letter in the string to uppercase. Numbers, spaces, and symbols stay the same. Useful for standardizing codes or making labels stand out.

SELECT
    UPPER('wireless bluetooth mouse')  AS product_upper,
    UPPER('shipment-a3')                AS code_upper;
Output
product_uppercode_upper
WIRELESS BLUETOOTH MOUSESHIPMENT-A3
9

LOWER(str)

Converts every letter to lowercase. The opposite of UPPER. Good for normalizing user input before comparing it.

SELECT
    LOWER('URGENT - EXPRESS SHIPPING')  AS lowered,
    LOWER('Ms. GARCIA')                AS name_lower;
Output
loweredname_lower
urgent - express shippingms. garcia
10

TRIM(str)

Removes spaces (or other specified characters) from both the beginning and end of a string. Does not touch spaces in the middle. Essential for cleaning up user-entered data.

SELECT
    TRIM('   Maria Santos   ')                       AS trimmed_name,
    TRIM(BOTH '*' FROM '***FEATURED***')            AS no_stars,
    CONCAT('[', TRIM('  SKU  '), ']')               AS proof;
Output
trimmed_nameno_starsproof
Maria SantosFEATURED[SKU]
11

LTRIM(str)

Removes spaces from the left (beginning) of a string only. The right side stays the same.

SELECT
    CONCAT('[', LTRIM('   Laptop Stand   '), ']') AS left_trimmed;
Output
left_trimmed
[Laptop Stand ]
12

RTRIM(str)

Removes spaces from the right (end) of a string only. The left side stays the same.

SELECT
    CONCAT('[', RTRIM('   Laptop Stand   '), ']') AS right_trimmed;
Output
right_trimmed
[ Laptop Stand]
13

REPLACE(str, from_str, to_str)

Finds every occurrence of from_str inside the string and replaces it with to_str. It replaces ALL matches, not just the first one. Case-sensitive.

SELECT
    REPLACE('Category: Electronics, Status: Pending', 'Pending', 'Shipped')  AS updated,
    REPLACE('555-0101', '-', '')                                            AS no_dash,
    REPLACE('Laptop,Mouse,Monitor', ',', ' | ')                             AS formatted;
Output
updatedno_dashformatted
Category: Electronics, Status: Shipped5550101Laptop | Mouse | Monitor
14

REVERSE(str)

Flips the entire string backwards. The first character becomes the last and so on. Occasionally useful for certain data tricks or pattern matching from the end.

SELECT
    REVERSE('PRD-001')    AS reversed,
    REVERSE('12321')      AS palindrome_check;
Output
reversedpalindrome_check
100-DRP12321
15

REPEAT(str, count)

Repeats a string a specified number of times. Useful for creating visual separators, padding, or generating test data.

SELECT
    REPEAT('-', 30)          AS separator,
    REPEAT('AB', 3)           AS pattern,
    CONCAT('SALE', REPEAT('!', 3)) AS promo;
Output
separatorpatternpromo
------------------------------ABABABSALE!!!
16

LPAD(str, len, padstr)

Pads a string on the left with a fill character until it reaches the desired length. Great for creating fixed-width IDs like order numbers that need leading zeros.

SELECT
    LPAD('42',  6, '0')    AS order_num,
    LPAD('587', 6, '0')    AS padded_id,
    CONCAT('PRD-', LPAD('7', 5, '0')) AS full_sku;
Output
order_numpadded_idfull_sku
000042000587PRD-00007
17

RPAD(str, len, padstr)

Same idea as LPAD but pads on the right side. Makes the string reach a certain length by adding characters at the end.

SELECT
    RPAD('Laptop',    20, '.')   AS item_dotted,
    RPAD('Keyboard',  20, '.')   AS item2_dotted;
Output
item_dotteditem2_dotted
Laptop..............Keyboard............
18

INSTR(str, substr)

Finds the position of the first occurrence of a substring inside a string. Returns the position number (starting at 1). Returns 0 if not found. Note: the haystack comes first, then the needle.

SELECT
    INSTR('Wireless Bluetooth Mouse', 'Bluetooth')  AS bt_pos,
    INSTR('PRD-2024-00587', '-')                      AS first_dash,
    INSTR('Keyboard', 'xyz')                         AS not_found;
Output
bt_posfirst_dashnot_found
1040
19

LOCATE(substr, str [, start])

Also finds the position of a substring, but the arguments are in the opposite order from INSTR: needle first, then haystack. It also has an optional third argument to specify where to start searching from.

SELECT
    LOCATE('-', 'PRD-2024-00587')          AS first_dash,
    LOCATE('-', 'PRD-2024-00587', 5)       AS second_dash,
    LOCATE('Mouse', 'Wireless Bluetooth Mouse') AS mouse_pos;
Output
first_dashsecond_dashmouse_pos
4920
20

POSITION(substr IN str)

Yet another way to find a substring position. This one uses the SQL standard syntax with the IN keyword. It is functionally identical to LOCATE with two arguments. Some people prefer it because it reads like English.

SELECT
    POSITION('Bluetooth' IN 'Wireless Bluetooth Mouse')  AS bt_pos,
    POSITION('@' IN 'support@acmecorp.com')              AS at_pos;
Output
bt_posat_pos
108
21

FORMAT(number, decimals)

Formats a number with commas as thousand separators and rounds to the specified number of decimal places. The result is a string, not a number. Perfect for displaying invoice totals or large counts in a readable way.

SELECT
    FORMAT(1234567.891, 2)    AS revenue,
    FORMAT(50000, 0)          AS customer_count,
    FORMAT(0.12345, 3)        AS discount_rate;
Output
revenuecustomer_countdiscount_rate
1,234,567.8950,0000.123
22

REGEXP_LIKE(str, pattern)

Tests whether a string matches a regular expression pattern. Returns 1 (match) or 0 (no match). Regular expressions are powerful pattern-matching rules. For example, ^PRD-[0-9]+$ means "starts with PRD- followed by one or more digits and nothing else."

SELECT
    REGEXP_LIKE('PRD-2024-00587', '^PRD-[0-9]{4}-[0-9]+$')      AS valid_sku,
    REGEXP_LIKE('support@acmecorp.com', '[a-z]+@[a-z]+\\.[a-z]+') AS valid_email,
    REGEXP_LIKE('$49.99', '^\\$[0-9]+\\.[0-9]{2}$')              AS valid_price;
Output
valid_skuvalid_emailvalid_price
111
23

REGEXP_REPLACE(str, pattern, replacement)

Like REPLACE, but uses a regular expression pattern instead of a fixed string. This lets you replace complex patterns. For example, you can strip all non-digit characters from a phone number in one call.

SELECT
    REGEXP_REPLACE('(555) 010-1234', '[^0-9]', '')        AS digits_only,
    REGEXP_REPLACE('Laptop   Mouse    Monitor', '\\s+', ' ') AS single_spaces,
    REGEXP_REPLACE('Invoice v2.1 DRAFT', 'v[0-9.]+\\s*', '') AS no_version;
Output
digits_onlysingle_spacesno_version
5550101234Laptop Mouse MonitorInvoice DRAFT
24

REGEXP_INSTR(str, pattern)

Like INSTR but uses a regular expression. Returns the position of the first match for the pattern. Returns 0 if no match is found. Useful when the thing you are searching for is a pattern, not an exact string.

SELECT
    REGEXP_INSTR('Order Total: 45, Qty: 120', '[0-9]+')        AS first_number_at,
    REGEXP_INSTR('PRD-2024-00587', '[0-9]{5}')                    AS five_digit_at,
    REGEXP_INSTR('No numbers here!', '[0-9]')                    AS not_found;
Output
first_number_atfive_digit_atnot_found
14100
25

REGEXP_SUBSTR(str, pattern)

Extracts the first substring that matches a regular expression. Instead of telling you where the match is, it gives you the actual matched text. Returns NULL if nothing matches.

SELECT
    REGEXP_SUBSTR('Account ID: GL-90021', 'GL-[0-9]+')      AS account_id,
    REGEXP_SUBSTR('Ratio: 120/80 units', '[0-9]+/[0-9]+')  AS ratio,
    REGEXP_SUBSTR('Price $14.20 each', '[0-9]+\\.[0-9]+') AS value;
Output
account_idratiovalue
GL-90021120/8014.20