Data Pipeline and third-party database schemas
These are up-to-date representations of our raw data pipeline pixel, as converted to various third-party database schemas.
For an easy way to generate them, check out our parsely_raw_data open source project.
This is for Parse.ly Data Pipeline v2.3.0. Looking for a previous version? Look here.
Redshift Schema
Column | Example | Type |
---|---|---|
action | ‘pageview’ | VARCHAR(256) NOT NULL |
apikey | ‘mashable.com’ | VARCHAR(256) NOT NULL |
campaign_id | ‘facebook_campaign’ | VARCHAR(256) |
channel | ‘website’ | VARCHAR(256) |
display | True | BOOLEAN |
display_avail_height | 735 | INTEGER |
display_avail_width | 1280 | INTEGER |
display_pixel_depth | 24 | INTEGER |
display_total_height | 800 | INTEGER |
display_total_width | 1280 | INTEGER |
engaged_time_inc | | INTEGER | |
event_id | ‘0xe6508eda93d5598367b18…’ | VARCHAR(64) NOT NULL |
extra_data | | JSON | |
flags_is_amp | | BOOLEAN | |
ip_city | ‘Newark’ | VARCHAR(4096) |
ip_continent | ‘NA’ | VARCHAR(2) |
ip_country | ‘US’ | VARCHAR(2) |
ip_lat | 37.5147 | FLOAT |
ip_lon | -122.0423 | FLOAT |
ip_postal | ‘94560’ | VARCHAR(64) |
ip_subdivision | ‘CA’ | VARCHAR(3) |
ip_timezone | ‘America/Los_Angeles’ | VARCHAR(256) |
ip_market_name | ‘New York’ | VARCHAR(256) |
ip_market_nielsen | ‘501’ | VARCHAR(3) |
ip_market_doubleclick | ‘3’ | VARCHAR(3) |
metadata | True | BOOLEAN |
metadata_authors | [‘Laura Vitto’] | VARCHAR(MAX) |
metadata_canonical_url | ‘http://mashable.com/201…’ | VARCHAR(4096) |
metadata_custom_metadata | ‘{“site”:”Mashable”}’ | VARCHAR(4096) |
metadata_duration | | INTEGER | |
metadata_data_source | ‘crawl’ | VARCHAR(8) |
metadata_full_content_word_count | 174 | INTEGER |
metadata_image_url | ‘http://a.amz.mshcdn.com…’ | VARCHAR(4096) |
metadata_page_type | ‘post’ | VARCHAR(256) |
metadata_post_id | ‘http://mashable.com/201…’ | VARCHAR(4096) |
metadata_pub_date_tmsp | 1473275118000 | BIGINT |
metadata_save_date_tmsp | 1473275204000 | BIGINT |
metadata_section | ‘watercooler’ | VARCHAR(256) |
metadata_share_urls | | VARCHAR(MAX) | |
metadata_tags | [‘gadgets’, ‘iphone-7’] | VARCHAR(MAX) |
metadata_thumb_url | ‘https://images.parsely….’ | VARCHAR(4096) |
metadata_title | ‘Everyone has the same f…’ | VARCHAR(4096) |
metadata_urls | [‘http://mashable.com/2016/09/07/airpods-jokes/’] | VARCHAR(MAX) |
pageload_id | 17911340 | VARCHAR(256) |
pageview_id | 24977559 | VARCHAR(256) |
ref_category | ‘internal’ | VARCHAR(64) |
ref_clean | ‘http://mashable.com/’ | VARCHAR(4096) |
ref_domain | ‘mashable.com’ | VARCHAR(256) |
ref_fragment | ” | VARCHAR(4096) |
ref_netloc | ‘mashable.com’ | VARCHAR(256) |
ref_params | ” | VARCHAR(4096) |
ref_path | ‘/’ | VARCHAR(4096) |
ref_query | ” | VARCHAR(4096) |
ref_scheme | ‘http’ | VARCHAR(64) |
referrer | ‘http://mashable.com/’ | VARCHAR(4096) |
schema_version | 2.3.0 | VARCHAR(64) |
session | True | BOOLEAN |
session_id | 6 | INTEGER |
session_initial_referrer | ‘http://mashable.com/’ | VARCHAR(4096) |
session_initial_url | ‘http://mashable.com/’ | VARCHAR(4096) |
session_last_session_timestamp | 1473271351611 | BIGINT |
session_timestamp | 1473277747806 | BIGINT |
slot | False | BOOLEAN |
sref_category | ‘internal’ | VARCHAR(64) |
sref_clean | ‘http://mashable.com/’ | VARCHAR(4096) |
sref_domain | ‘mashable.com’ | VARCHAR(256) |
sref_fragment | ” | VARCHAR(4096) |
sref_netloc | ‘mashable.com’ | VARCHAR(256) |
sref_params | ” | VARCHAR(4096) |
sref_path | ‘/’ | VARCHAR(4096) |
sref_query | ” | VARCHAR(4096) |
sref_scheme | ‘http’ | VARCHAR(64) |
surl_utm_campaign | ‘facebook_campaign’ | VARCHAR(256) |
surl_utm_medium | ‘partners’ | VARCHAR(64) |
surl_utm_source | ‘facebook’ | VARCHAR(64) |
surl_utm_term | ‘8098’ | VARCHAR(64) |
surl_utm_content | ‘sports’ | VARCHAR(256) |
surl_clean | ‘http://mashable.com/’ | VARCHAR(4096) |
surl_domain | ‘mashable.com’ | VARCHAR(256) |
surl_fragment | ” | VARCHAR(4096) |
surl_netloc | ‘mashable.com’ | VARCHAR(256) |
surl_params | ” | VARCHAR(4096) |
surl_path | ‘/’ | VARCHAR(4096) |
surl_query | ” | VARCHAR(4096) |
surl_scheme | ‘http’ | VARCHAR(64) |
timestamp_info | True | BOOLEAN |
timestamp_info_nginx_ms | 1473277850000 | BIGINT NOT NULL |
timestamp_info_override_ms | | BIGINT | |
timestamp_info_pixel_ms | 1473277850017 | BIGINT |
ts_action | ‘2016-09-07 19:50:50’ | TIMESTAMP |
ts_session_current | ‘2016-09-07 19:49:07’ | TIMESTAMP |
ts_session_last | ‘2016-09-07 18:02:31’ | TIMESTAMP |
ua_browser | ‘Safari’ | VARCHAR(4096) |
ua_browserversion | ‘9.1.2’ | VARCHAR(4096) |
ua_device | ‘Other’ | VARCHAR(4096) |
ua_devicebrand | None | VARCHAR(4096) |
ua_devicemodel | None | VARCHAR(4096) |
ua_devicetouchcapable | True | BOOLEAN |
ua_devicetype | ‘desktop’ | VARCHAR(4096) |
ua_os | ‘Mac OS X’ | VARCHAR(4096) |
ua_osversion | ‘10.10.5’ | VARCHAR(4096) |
url | ‘http://mashable.com/201…’ | VARCHAR(4096) |
url_clean | ‘http://mashable.com/201…’ | VARCHAR(4096) |
url_domain | ‘mashable.com’ | VARCHAR(256) |
url_fragment | ‘L.eZPflSGqq5’ | VARCHAR(4096) |
url_netloc | ‘mashable.com’ | VARCHAR(256) |
url_params | ” | VARCHAR(4096) |
url_path | ‘/2016/09/07/airpods-jok…’ | VARCHAR(4096) |
url_query | ” | VARCHAR(4096) |
url_scheme | ‘http’ | VARCHAR(64) |
utm_campaign | ‘facebook_campaign’ | VARCHAR(256) |
utm_medium | ‘partners’ | VARCHAR(64) |
utm_source | ‘facebook’ | VARCHAR(64) |
utm_term | ‘8098’ | VARCHAR(64) |
utm_content | ‘sports’ | VARCHAR(256) |
user_agent | ‘Mozilla/5.0 (Macintosh;…’ | VARCHAR(4096) |
version | 1 | INTEGER |
videostart_id | 99887766 | VARCHAR(256) |
visitor | True | BOOLEAN |
visitor_network_id | NULL | VARCHAR(128) |
visitor_site_id | NULL | VARCHAR(128) NOT NULL |
Redshift DDL
CREATE TABLE parsely.rawdata (
action VARCHAR(256) NOT NULL,
apikey VARCHAR(256) NOT NULL,
campaign_id VARCHAR(256),
channel VARCHAR(256),
display BOOLEAN,
display_avail_height INTEGER,
display_avail_width INTEGER,
display_pixel_depth INTEGER,
display_total_height INTEGER,
display_total_width INTEGER,
engaged_time_inc INTEGER,
event_id VARCHAR(64) NOT NULL,
flags_is_amp BOOLEAN,
ip_city VARCHAR(4096),
ip_continent VARCHAR(2),
ip_country VARCHAR(2),
ip_lat FLOAT,
ip_lon FLOAT,
ip_postal VARCHAR(64),
ip_subdivision VARCHAR(3),
ip_timezone VARCHAR(256),
ip_market_name VARCHAR(256),
ip_market_nielsen VARCHAR(3),
ip_market_doubleclick VARCHAR(3),
metadata BOOLEAN,
metadata_authors VARCHAR(MAX),
metadata_canonical_url VARCHAR(4096),
metadata_custom_metadata VARCHAR(4096),
metadata_duration INTEGER,
metadata_data_source VARCHAR(8),
metadata_full_content_word_count INTEGER,
metadata_image_url VARCHAR(4096),
metadata_page_type VARCHAR(256),
metadata_post_id VARCHAR(4096),
metadata_pub_date_tmsp BIGINT,
metadata_save_date_tmsp BIGINT,
metadata_section VARCHAR(256),
metadata_share_urls VARCHAR(MAX),
metadata_tags VARCHAR(MAX),
metadata_thumb_url VARCHAR(4096),
metadata_title VARCHAR(4096),
metadata_urls VARCHAR(MAX),
pageload_id VARCHAR(256),
pageview_id VARCHAR(256),
ref_category VARCHAR(64),
ref_clean VARCHAR(4096),
ref_domain VARCHAR(256),
ref_fragment VARCHAR(4096),
ref_netloc VARCHAR(256),
ref_params VARCHAR(4096),
ref_path VARCHAR(4096),
ref_query VARCHAR(4096),
ref_scheme VARCHAR(64),
referrer VARCHAR(4096),
schema_version VARCHAR(64),
session BOOLEAN,
session_id INTEGER,
session_initial_referrer VARCHAR(4096),
session_initial_url VARCHAR(4096),
session_last_session_timestamp BIGINT,
session_timestamp BIGINT,
slot BOOLEAN,
sref_category VARCHAR(64),
sref_clean VARCHAR(4096),
sref_domain VARCHAR(256),
sref_fragment VARCHAR(4096),
sref_netloc VARCHAR(256),
sref_params VARCHAR(4096),
sref_path VARCHAR(4096),
sref_query VARCHAR(4096),
sref_scheme VARCHAR(64),
surl_utm_campaign VARCHAR(256),
surl_utm_medium VARCHAR(64),
surl_utm_source VARCHAR(64),
surl_utm_term VARCHAR(64),
surl_utm_content VARCHAR(256),
surl_clean VARCHAR(4096),
surl_domain VARCHAR(256),
surl_fragment VARCHAR(4096),
surl_netloc VARCHAR(256),
surl_params VARCHAR(4096),
surl_path VARCHAR(4096),
surl_query VARCHAR(4096),
surl_scheme VARCHAR(64),
timestamp_info BOOLEAN,
timestamp_info_nginx_ms BIGINT NOT NULL,
timestamp_info_override_ms BIGINT,
timestamp_info_pixel_ms BIGINT,
ts_action TIMESTAMP,
ts_session_current TIMESTAMP,
ts_session_last TIMESTAMP,
ua_browser VARCHAR(4096),
ua_browserversion VARCHAR(4096),
ua_device VARCHAR(4096),
ua_devicebrand VARCHAR(4096),
ua_devicemodel VARCHAR(4096),
ua_devicetouchcapable BOOLEAN,
ua_devicetype VARCHAR(4096),
ua_os VARCHAR(4096),
ua_osversion VARCHAR(4096),
url VARCHAR(4096),
url_clean VARCHAR(4096),
url_domain VARCHAR(256),
url_fragment VARCHAR(4096),
url_netloc VARCHAR(256),
url_params VARCHAR(4096),
url_path VARCHAR(4096),
url_query VARCHAR(4096),
url_scheme VARCHAR(64),
utm_campaign VARCHAR(256),
utm_medium VARCHAR(64),
utm_source VARCHAR(64),
utm_term VARCHAR(64),
utm_content VARCHAR(256),
user_agent VARCHAR(4096),
version INTEGER,
videostart_id VARCHAR(256),
visitor BOOLEAN,
visitor_network_id VARCHAR(128),
visitor_site_id VARCHAR(128) NOT NULL
);
BigQuery Schema
Column | Example | Type |
---|---|---|
action | ‘pageview’ | STRING |
apikey | ‘mashable.com’ | STRING |
campaign_id | ‘facebook_campaign’ | STRING |
channel | ‘website’ | STRING |
display | True | BOOLEAN |
display_avail_height | 735 | INTEGER |
display_avail_width | 1280 | INTEGER |
display_pixel_depth | 24 | INTEGER |
display_total_height | 800 | INTEGER |
display_total_width | 1280 | INTEGER |
engaged_time_inc | | INTEGER | |
event_id | ‘0xe6508eda93d5598367b18…’ | STRING |
extra_data | | JSON | |
flags_is_amp | | BOOLEAN | |
ip_city | ‘Newark’ | STRING |
ip_continent | ‘NA’ | STRING |
ip_country | ‘US’ | STRING |
ip_lat | 37.5147 | FLOAT |
ip_lon | -122.0423 | FLOAT |
ip_postal | ‘94560’ | STRING |
ip_subdivision | ‘CA’ | STRING |
ip_timezone | ‘America/Los_Angeles’ | STRING |
ip_market_name | ‘New York’ | STRING |
ip_market_nielsen | ‘501’ | STRING |
ip_market_doubleclick | ‘3’ | STRING |
metadata | True | BOOLEAN |
metadata_authors | [‘Laura Vitto’] | STRING (REPEATED) |
metadata_canonical_url | ‘http://mashable.com/201…’ | STRING |
metadata_custom_metadata | ‘{“site”:”Mashable”}’ | STRING |
metadata_duration | | INTEGER | |
metadata_data_source | ‘crawl’ | STRING |
metadata_full_content_word_count | 174 | INTEGER |
metadata_image_url | ‘http://a.amz.mshcdn.com…’ | STRING |
metadata_page_type | ‘post’ | STRING |
metadata_post_id | ‘http://mashable.com/201…’ | STRING |
metadata_pub_date_tmsp | 1473275118000 | INTEGER |
metadata_save_date_tmsp | 1473275204000 | INTEGER |
metadata_section | ‘watercooler’ | STRING |
metadata_share_urls | | STRING (REPEATED) | |
metadata_tags | [‘gadgets’, ‘iphone-7’] | STRING (REPEATED) |
metadata_thumb_url | ‘https://images.parsely….’ | STRING |
metadata_title | ‘Everyone has the same f…’ | STRING |
metadata_urls | [‘http://mashable.com/2016/09/07/airpods-jokes/’] | STRING (REPEATED) |
pageload_id | 11223344 | STRING |
pageview_id | 55667788 | STRING |
ref_category | ‘internal’ | STRING |
ref_clean | ‘http://mashable.com/’ | STRING |
ref_domain | ‘mashable.com’ | STRING |
ref_fragment | ” | STRING |
ref_netloc | ‘mashable.com’ | STRING |
ref_params | ” | STRING |
ref_path | ‘/’ | STRING |
ref_query | ” | STRING |
ref_scheme | ‘http’ | STRING |
referrer | ‘http://mashable.com/’ | STRING |
schema_version | ‘2.3.0’ | STRING |
session | True | BOOLEAN |
session_id | 6 | INTEGER |
session_initial_referrer | ‘http://mashable.com/’ | STRING |
session_initial_url | ‘http://mashable.com/’ | STRING |
session_last_session_timestamp | 1473271351611 | INTEGER |
session_timestamp | 1473277747806 | INTEGER |
slot | False | BOOLEAN |
sref_category | ‘internal’ | STRING |
sref_clean | ‘http://mashable.com/’ | STRING |
sref_domain | ‘mashable.com’ | STRING |
sref_fragment | ” | STRING |
sref_netloc | ‘mashable.com’ | STRING |
sref_params | ” | STRING |
sref_path | ‘/’ | STRING |
sref_query | ” | STRING |
sref_scheme | ‘http’ | STRING |
surl_utm_campaign | ‘facebook_campaign’ | STRING |
surl_utm_medium | ‘partners’ | STRING |
surl_utm_source | ‘facebook’ | STRING |
surl_utm_term | ‘8098’ | STRING |
surl_utm_content | ‘sports’ | STRING |
surl_clean | ‘http://mashable.com/’ | STRING |
surl_domain | ‘mashable.com’ | STRING |
surl_fragment | ” | STRING |
surl_netloc | ‘mashable.com’ | STRING |
surl_params | ” | STRING |
surl_path | ‘/’ | STRING |
surl_query | ” | STRING |
surl_scheme | ‘http’ | STRING |
timestamp_info | True | BOOLEAN |
timestamp_info_nginx_ms | 1473277850000 | INTEGER |
timestamp_info_override_ms | | INTEGER | |
timestamp_info_pixel_ms | 1473277850017 | INTEGER |
ts_action | ‘2016-09-07 19:50:50’ | STRING |
ts_session_current | ‘2016-09-07 19:49:07’ | STRING |
ts_session_last | ‘2016-09-07 18:02:31’ | STRING |
ua_browser | ‘Safari’ | STRING |
ua_browserversion | ‘9.1.2’ | STRING |
ua_device | ‘Other’ | STRING |
ua_devicebrand | None | STRING |
ua_devicemodel | None | STRING |
ua_devicetouchcapable | True | BOOLEAN |
ua_devicetype | ‘desktop’ | STRING |
ua_os | ‘Mac OS X’ | STRING |
ua_osversion | ‘10.10.5’ | STRING |
url | ‘http://mashable.com/201…’ | STRING |
url_clean | ‘http://mashable.com/201…’ | STRING |
url_domain | ‘mashable.com’ | STRING |
url_fragment | ‘L.eZPflSGqq5’ | STRING |
url_netloc | ‘mashable.com’ | STRING |
url_params | ” | STRING |
url_path | ‘/2016/09/07/airpods-jok…’ | STRING |
url_query | ” | STRING |
url_scheme | ‘http’ | STRING |
utm_campaign | ‘facebook_campaign’ | STRING |
utm_medium | ‘partners’ | STRING |
utm_source | ‘facebook’ | STRING |
utm_term | ‘8098’ | STRING |
utm_content | ‘sports’ | STRING |
user_agent | ‘Mozilla/5.0 (Macintosh;…’ | STRING |
version | 1 | INTEGER |
videostart_id | 99887766 | STRING |
visitor | True | BOOLEAN |
visitor_network_id | NULL | STRING |
visitor_site_id | ‘ab94fd31-a207-4010-8a25…’ | STRING |
BigQuery DDL
{"name": "action", "type": "STRING", "mode": "NULLABLE"}
{"name": "apikey", "type": "STRING", "mode": "NULLABLE"}
{"name": "campaign_id", "type": "STRING", "mode": "NULLABLE"}
{"name": "channel", "type": "STRING", "mode": "NULLABLE"}
{"name": "display", "type": "BOOLEAN", "mode": "NULLABLE"}
{"name": "display_avail_height", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "display_avail_width", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "display_pixel_depth", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "display_total_height", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "display_total_width", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "engaged_time_inc", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "event_id", "type": "STRING", "mode": "NULLABLE"}
{"name": "flags_is_amp", "type": "BOOLEAN", "mode": "NULLABLE"}
{"name": "ip_city", "type": "STRING", "mode": "NULLABLE"}
{"name": "ip_continent", "type": "STRING", "mode": "NULLABLE"}
{"name": "ip_country", "type": "STRING", "mode": "NULLABLE"}
{"name": "ip_lat", "type": "FLOAT", "mode": "NULLABLE"}
{"name": "ip_lon", "type": "FLOAT", "mode": "NULLABLE"}
{"name": "ip_postal", "type": "STRING", "mode": "NULLABLE"}
{"name": "ip_subdivision", "type": "STRING", "mode": "NULLABLE"}
{"name": "ip_timezone", "type": "STRING", "mode": "NULLABLE"}
{"name": "ip_market_name", "type": "STRING", "mode": "NULLABLE"}
{"name": "ip_market_nielsen", "type": "STRING", "mode": "NULLABLE"}
{"name": "ip_market_doubleclick", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata", "type": "BOOLEAN", "mode": "NULLABLE"}
{"name": "metadata_authors", "type": "STRING", "mode": "REPEATED"}
{"name": "metadata_canonical_url", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_custom_metadata", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_duration", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "metadata_data_source", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_full_content_word_count", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "metadata_image_url", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_page_type", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_post_id", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_pub_date_tmsp", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "metadata_save_date_tmsp", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "metadata_section", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_share_urls", "type": "STRING", "mode": "REPEATED"}
{"name": "metadata_tags", "type": "STRING", "mode": "REPEATED"}
{"name": "metadata_thumb_url", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_title", "type": "STRING", "mode": "NULLABLE"}
{"name": "metadata_urls", "type": "STRING", "mode": "REPEATED"}
{"name": "pageload_id", "type": "STRING", "mode": "NULLABLE"}
{"name": "pageview_id", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_category", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_clean", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_domain", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_fragment", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_netloc", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_params", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_path", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_query", "type": "STRING", "mode": "NULLABLE"}
{"name": "ref_scheme", "type": "STRING", "mode": "NULLABLE"}
{"name": "referrer", "type": "STRING", "mode": "NULLABLE"}
{"name": "schema_version", "type": "STRING", "mode": "NULLABLE"}
{"name": "session", "type": "BOOLEAN", "mode": "NULLABLE"}
{"name": "session_id", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "session_initial_referrer", "type": "STRING", "mode": "NULLABLE"}
{"name": "session_initial_url", "type": "STRING", "mode": "NULLABLE"}
{"name": "session_last_session_timestamp", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "session_timestamp", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "slot", "type": "BOOLEAN", "mode": "NULLABLE"}
{"name": "sref_category", "type": "STRING", "mode": "NULLABLE"}
{"name": "sref_clean", "type": "STRING", "mode": "NULLABLE"}
{"name": "sref_domain", "type": "STRING", "mode": "NULLABLE"}
{"name": "sref_fragment", "type": "STRING", "mode": "NULLABLE"}
{"name": "sref_netloc", "type": "STRING", "mode": "NULLABLE"}
{"name": "sref_params", "type": "STRING", "mode": "NULLABLE"}
{"name": "sref_path", "type": "STRING", "mode": "NULLABLE"}
{"name": "sref_query", "type": "STRING", "mode": "NULLABLE"}
{"name": "sref_scheme", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_utm_campaign", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_utm_medium", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_utm_source", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_utm_term", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_utm_content", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_clean", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_domain", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_fragment", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_netloc", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_params", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_path", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_query", "type": "STRING", "mode": "NULLABLE"}
{"name": "surl_scheme", "type": "STRING", "mode": "NULLABLE"}
{"name": "timestamp_info", "type": "BOOLEAN", "mode": "NULLABLE"}
{"name": "timestamp_info_nginx_ms", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "timestamp_info_override_ms", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "timestamp_info_pixel_ms", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "ts_action", "type": "STRING", "mode": "NULLABLE"}
{"name": "ts_session_current", "type": "STRING", "mode": "NULLABLE"}
{"name": "ts_session_last", "type": "STRING", "mode": "NULLABLE"}
{"name": "ua_browser", "type": "STRING", "mode": "NULLABLE"}
{"name": "ua_browserversion", "type": "STRING", "mode": "NULLABLE"}
{"name": "ua_device", "type": "STRING", "mode": "NULLABLE"}
{"name": "ua_devicebrand", "type": "STRING", "mode": "NULLABLE"}
{"name": "ua_devicemodel", "type": "STRING", "mode": "NULLABLE"}
{"name": "ua_devicetouchcapable", "type": "BOOLEAN", "mode": "NULLABLE"}
{"name": "ua_devicetype", "type": "STRING", "mode": "NULLABLE"}
{"name": "ua_os", "type": "STRING", "mode": "NULLABLE"}
{"name": "ua_osversion", "type": "STRING", "mode": "NULLABLE"}
{"name": "url", "type": "STRING", "mode": "NULLABLE"}
{"name": "url_clean", "type": "STRING", "mode": "NULLABLE"}
{"name": "url_domain", "type": "STRING", "mode": "NULLABLE"}
{"name": "url_fragment", "type": "STRING", "mode": "NULLABLE"}
{"name": "url_netloc", "type": "STRING", "mode": "NULLABLE"}
{"name": "url_params", "type": "STRING", "mode": "NULLABLE"}
{"name": "url_path", "type": "STRING", "mode": "NULLABLE"}
{"name": "url_query", "type": "STRING", "mode": "NULLABLE"}
{"name": "url_scheme", "type": "STRING", "mode": "NULLABLE"}
{"name": "utm_campaign", "type": "STRING", "mode": "NULLABLE"}
{"name": "utm_medium", "type": "STRING", "mode": "NULLABLE"}
{"name": "utm_source", "type": "STRING", "mode": "NULLABLE"}
{"name": "utm_term", "type": "STRING", "mode": "NULLABLE"}
{"name": "utm_content", "type": "STRING", "mode": "NULLABLE"}
{"name": "user_agent", "type": "STRING", "mode": "NULLABLE"}
{"name": "version", "type": "INTEGER", "mode": "NULLABLE"}
{"name": "videostart_id", "type": "STRING", "mode": "NULLABLE"}
{"name": "visitor", "type": "BOOLEAN", "mode": "NULLABLE"}
{"name": "visitor_network_id", "type": "STRING", "mode": "NULLABLE"}
{"name": "visitor_site_id", "type": "STRING", "mode": "NULLABLE"}
Athena Schema
Column | Example | Type |
---|---|---|
action | ‘pageview’ | STRING |
apikey | ‘mashable.com’ | STRING |
campaign_id | ‘facebook_campaign’ | STRING |
channel | ‘website’ | STRING |
display | True | BOOLEAN |
display_avail_height | 735 | INT |
display_avail_width | 1280 | INT |
display_pixel_depth | 24 | INT |
display_total_height | 800 | INT |
display_total_width | 1280 | INT |
engaged_time_inc | | INT | |
event_id | ‘0xe6508eda93d5598367b18…’ | STRING |
extra_data | | STRING | |
flags_is_amp | | BOOLEAN | |
ip_city | ‘Newark’ | STRING |
ip_continent | ‘NA’ | STRING |
ip_country | ‘US’ | STRING |
ip_lat | 37.5147 | DOUBLE |
ip_lon | -122.0423 | DOUBLE |
ip_postal | ‘94560’ | STRING |
ip_subdivision | ‘CA’ | STRING |
ip_timezone | ‘America/Los_Angeles’ | STRING |
ip_market_name | ‘New York’ | STRING |
ip_market_nielsen | ‘501’ | STRING |
ip_market_doubleclick | ‘3’ | STRING |
metadata | True | BOOLEAN |
metadata_authors | [‘Laura Vitto’] | ARRAY (STRING) |
metadata_canonical_url | ‘http://mashable.com/201…’ | STRING |
metadata_custom_metadata | ‘{“site”:”Mashable”}’ | STRING |
metadata_duration | | INT | |
metadata_data_source | ‘crawl’ | STRING |
metadata_full_content_word_count | 174 | INT |
metadata_image_url | ‘http://a.amz.mshcdn.com…’ | STRING |
metadata_page_type | ‘post’ | STRING |
metadata_post_id | ‘http://mashable.com/201…’ | STRING |
metadata_pub_date_tmsp | 1473275118000 | INT |
metadata_save_date_tmsp | 1473275204000 | INT |
metadata_section | ‘watercooler’ | STRING |
metadata_share_urls | | ARRAY (STRING) | |
metadata_tags | [‘gadgets’, ‘iphone-7’] | ARRAY (STRING) |
metadata_thumb_url | ‘https://images.parsely….’ | STRING |
metadata_title | ‘Everyone has the same f…’ | STRING |
metadata_urls | [‘http://mashable.com/2016/09/07/airpods-jokes/’] | ARRAY (STRING) |
pageload_id | 11223344 | STRING |
pageview_id | 55667788 | STRING |
ref_category | ‘internal’ | STRING |
ref_clean | ‘http://mashable.com/’ | STRING |
ref_domain | ‘mashable.com’ | STRING |
ref_fragment | ” | STRING |
ref_netloc | ‘mashable.com’ | STRING |
ref_params | ” | STRING |
ref_path | ‘/’ | STRING |
ref_query | ” | STRING |
ref_scheme | ‘http’ | STRING |
referrer | ‘http://mashable.com/’ | STRING |
schema_version | ‘2.3.0’ | STRING |
session | True | BOOLEAN |
session_id | 6 | INT |
session_initial_referrer | ‘http://mashable.com/’ | STRING |
session_initial_url | ‘http://mashable.com/’ | STRING |
session_last_session_timestamp | 1473271351611 | INT |
session_timestamp | 1473277747806 | INT |
slot | False | BOOLEAN |
sref_category | ‘internal’ | STRING |
sref_clean | ‘http://mashable.com/’ | STRING |
sref_domain | ‘mashable.com’ | STRING |
sref_fragment | ” | STRING |
sref_netloc | ‘mashable.com’ | STRING |
sref_params | ” | STRING |
sref_path | ‘/’ | STRING |
sref_query | ” | STRING |
sref_scheme | ‘http’ | STRING |
surl_utm_campaign | ‘facebook_campaign’ | STRING |
surl_utm_medium | ‘partners’ | STRING |
surl_utm_source | ‘facebook’ | STRING |
surl_utm_term | ‘8098’ | STRING |
surl_utm_content | ‘sports’ | STRING |
surl_clean | ‘http://mashable.com/’ | STRING |
surl_domain | ‘mashable.com’ | STRING |
surl_fragment | ” | STRING |
surl_netloc | ‘mashable.com’ | STRING |
surl_params | ” | STRING |
surl_path | ‘/’ | STRING |
surl_query | ” | STRING |
surl_scheme | ‘http’ | STRING |
timestamp_info | True | BOOLEAN |
timestamp_info_nginx_ms | 1473277850000 | INT |
timestamp_info_override_ms | | INT | |
timestamp_info_pixel_ms | 1473277850017 | INT |
ts_action | ‘2016-09-07 19:50:50’ | STRING |
ts_session_current | ‘2016-09-07 19:49:07’ | STRING |
ts_session_last | ‘2016-09-07 18:02:31’ | STRING |
ua_browser | ‘Safari’ | STRING |
ua_browserversion | ‘9.1.2’ | STRING |
ua_device | ‘Other’ | STRING |
ua_devicebrand | None | STRING |
ua_devicemodel | None | STRING |
ua_devicetouchcapable | True | BOOLEAN |
ua_devicetype | ‘desktop’ | STRING |
ua_os | ‘Mac OS X’ | STRING |
ua_osversion | ‘10.10.5’ | STRING |
url | ‘http://mashable.com/201…’ | STRING |
url_clean | ‘http://mashable.com/201…’ | STRING |
url_domain | ‘mashable.com’ | STRING |
url_fragment | ‘L.eZPflSGqq5’ | STRING |
url_netloc | ‘mashable.com’ | STRING |
url_params | ” | STRING |
url_path | ‘/2016/09/07/airpods-jok…’ | STRING |
url_query | ” | STRING |
url_scheme | ‘http’ | STRING |
utm_campaign | ‘facebook_campaign’ | STRING |
utm_medium | ‘partners’ | STRING |
utm_source | ‘facebook’ | STRING |
utm_term | ‘8098’ | STRING |
utm_content | ‘sports’ | STRING |
user_agent | ‘Mozilla/5.0 (Macintosh;…’ | STRING |
version | 1 | INT |
videostart_id | 99887766 | STRING |
visitor | True | BOOLEAN |
visitor_network_id | NULL | STRING |
visitor_site_id | ‘ab94fd31-a207-4010-8a25…’ | STRING |
Athena DDL
CREATE EXTERNAL TABLE `table_name`(
`action` string,
`apikey` string,
`campaign_id` string,
`channel` string,
`display_avail_height` int,
`display_avail_width` int,
`display_pixel_depth` int,
`display_total_height` int,
`display_total_width` int,
`display` boolean,
`engaged_time_inc` int,
`event_id` string,
`extra_data` string,
`flags_is_amp` string,
`ip_city` string,
`ip_continent` string,
`ip_country` string,
`ip_lat` double,
`ip_lon` double,
`ip_market_doubleclick` string,
`ip_market_name` string,
`ip_market_nielsen` string,
`ip_postal` string,
`ip_subdivision` string,
`ip_timezone` string,
`metadata_authors` array<string>,
`metadata_canonical_url` string,
`metadata_custom_metadata` string,
`metadata_data_source` string,
`metadata_duration` string,
`metadata_full_content_word_count` int,
`metadata_image_url` string,
`metadata_page_type` string,
`metadata_post_id` string,
`metadata_pub_date_tmsp` bigint,
`metadata_save_date_tmsp` bigint,
`metadata_section` string,
`metadata_share_urls` array<string>,
`metadata_tags` array<string>,
`metadata_thumb_url` string,
`metadata_title` string,
`metadata_urls` array<string>,
`metadata` boolean,
`pageload_id` string,
`pageview_id` string,
`ref_category` string,
`ref_clean` string,
`ref_domain` string,
`ref_fragment` string,
`ref_netloc` string,
`ref_params` string,
`ref_path` string,
`ref_query` string,
`ref_scheme` string,
`referrer` string,
`schema_version` string,
`session_id` int,
`session_initial_referrer` string,
`session_initial_url` string,
`session_last_session_timestamp` bigint,
`session_timestamp` bigint,
`session` boolean,
`slot` boolean,
`sref_category` string,
`sref_clean` string,
`sref_domain` string,
`sref_fragment` string,
`sref_netloc` string,
`sref_params` string,
`sref_path` string,
`sref_query` string,
`sref_scheme` string,
`surl_clean` string,
`surl_domain` string,
`surl_fragment` string,
`surl_netloc` string,
`surl_params` string,
`surl_path` string,
`surl_query` string,
`surl_scheme` string,
`surl_utm_campaign` string,
`surl_utm_content` string,
`surl_utm_medium` string,
`surl_utm_source` string,
`timestamp_info_nginx_ms` bigint,
`timestamp_info_override_ms` string,
`timestamp_info_pixel_ms` bigint,
`timestamp_info` boolean,
`ts_action` string,
`ts_session_current` string,
`ts_session_last` string,
`ua_browser` string,
`ua_browserversion` string,
`ua_device` string,
`ua_devicebrand` string,
`ua_devicemodel` string,
`ua_devicetype` string,
`ua_os` string,
`ua_osversion` string,
`url_clean` string,
`url_domain` string,
`url_fragment` string,
`url_netloc` string,
`url_params` string,
`url_path` string,
`url_query` string,
`url_scheme` string,
`url` string,
`user_agent` string,
`utm_campaign` string,
`utm_content` string,
`utm_medium` string,
`utm_source` string,
`version` int,
`videostart_id` string,
`visitor_ip` string,
`visitor_network_id` string,
`visitor_site_id` string,
`visitor` boolean,
`ua_devicetouchcapable` boolean)
PARTITIONED BY (
`year` string,
`month` string)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
LOCATION
's3://parsely-dw-bucket-name/events'
TBLPROPERTIES (
'classification'='json',
'compressionType'='gzip',
'transient_lastDdlTime'='1570652964',
'typeOfData'='file');
Last updated: October 23, 2024