| Title: | Parse 'Tableau' Workbooks into Functional Data |
|---|---|
| Description: | High-performance parsing of 'Tableau' workbook files into tidy data frames and dependency graphs for other visualization tools like R 'Shiny' or 'Power BI' replication. |
| Authors: | George Arthur [aut, cre] (ORCID: <https://orcid.org/0000-0002-1975-1459>) |
| Maintainer: | George Arthur <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.4.0 |
| Built: | 2026-06-10 07:06:23 UTC |
| Source: | https://github.com/prigasg/twbparser |
Creates a directed graph where edges point from input fields used in a
formula to the calculated output field. Tokens are extracted from bracketed
references like [Table].[Field] or [Field].
build_dependency_graph(fields_df)build_dependency_graph(fields_df)
fields_df |
A data frame with at least columns |
An igraph directed graph where vertices are field names and edges
represent dependencies (input → output).
fields <- tibble::tibble( name = c("X_plus_Y", "Z"), formula = c("[X] + [Y]", "[X_plus_Y] * 2") ) g <- build_dependency_graph(fields)fields <- tibble::tibble( name = c("X_plus_Y", "Z"), formula = c("[X] + [Y]", "[X_plus_Y] * 2") ) g <- build_dependency_graph(fields)
Finds columns that contain <calculation>nodes and returns metadata and
formulas, with a heuristic flag for table calculations.
extract_calculated_fields(xml_doc, include_parameters = FALSE)extract_calculated_fields(xml_doc, include_parameters = FALSE)
xml_doc |
An xml2 document for a Tableau .twb. |
include_parameters |
Logical. If TRUE, include items from the "Parameters" datasource or columns with @param-domain-type. Default FALSE. |
A tibble with columns:
Datasource name.
User-visible caption or cleaned internal name.
Internal Tableau name (often bracketed).
Tableau datatype.
Tableau role.
Calculation formula string.
Tableau calc class (often "tableau").
Heuristic flag for table calcs (e.g., WINDOW_, LOOKUP).
Raw table reference.
Cleaned table name.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) calcs <- extract_calculated_fields(xml) head(calcs)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) calcs <- extract_calculated_fields(xml) head(calcs)
Scans top-level <datasource> nodes (excluding view-specific references) and
returns fields with raw names/captions, cleaned table/field names, and basic
metadata.
extract_columns_with_table_source(xml_doc)extract_columns_with_table_source(xml_doc)
xml_doc |
An |
A tibble with columns:
Datasource name.
Raw column name (may include brackets/qualifiers).
Column caption if present.
Tableau datatype.
Tableau role.
Semantic role if present.
Raw table reference.
Cleaned table name (no brackets/suffix).
Cleaned field name.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) fields <- extract_columns_with_table_source(xml)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) fields <- extract_columns_with_table_source(xml)
Gathers runtime tables (from the object graph), merges in named-connection metadata (class, caption, targets), and augments with top-level datasource definitions (field counts, connection type, location). Also returns a filtered table of parameter datasources.
extract_datasource_details(xml_doc)extract_datasource_details(xml_doc)
xml_doc |
An |
A named list with:
Tibble of datasources joined with connection metadata.
Tibble of parameter datasources (if present).
Same as data_sources (placeholder for future variants).
# Preferred: from a tiny .twb twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") if (nzchar(twb) && file.exists(twb)) { xml <- xml2::read_xml(twb) res <- extract_datasource_details(xml) head(res$data_sources) } # Alternative: from a tiny .twbx (guarded) twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") if (nzchar(twbx) && file.exists(twbx)) { members <- twbx_list(twbx) twb_rows <- members$name[grepl("\\.twb$", members$name)] if (length(twb_rows) > 0L && !is.na(twb_rows[1])) { twb_member <- twb_rows[1] xml <- xml2::read_xml(utils::unzip(twbx, twb_member, exdir = tempdir())) res <- extract_datasource_details(xml) head(res$data_sources) } }# Preferred: from a tiny .twb twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") if (nzchar(twb) && file.exists(twb)) { xml <- xml2::read_xml(twb) res <- extract_datasource_details(xml) head(res$data_sources) } # Alternative: from a tiny .twbx (guarded) twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") if (nzchar(twbx) && file.exists(twbx)) { members <- twbx_list(twbx) twb_rows <- members$name[grepl("\\.twb$", members$name)] if (length(twb_rows) > 0L && !is.na(twb_rows[1])) { twb_member <- twb_rows[1] xml <- xml2::read_xml(utils::unzip(twbx, twb_member, exdir = tempdir())) res <- extract_datasource_details(xml) head(res$data_sources) } }
<relation type="join"> nodesHandles both column-based clauses (<clause><column/></clause>) and
expression-based predicates (<expression op=...>) found in TWB XML.
extract_joins(xml_doc)extract_joins(xml_doc)
xml_doc |
An |
A tibble with columns:
Join kind (e.g., inner, left), if available.
Left table name (cleaned).
Left field name.
Predicate operator (defaults to "=" when missing).
Right table name (cleaned).
Right field name.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) extract_joins(xml)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) extract_joins(xml)
<named-connection> entries from a TWBRich, safe extraction of <named-connection> nodes and their <connection>
attributes into a tidy tibble.
extract_named_connections(xml_doc)extract_named_connections(xml_doc)
xml_doc |
An |
Tibble with columns like connection_id, connection_caption,
connection_class, connection_target, dbname, schema, warehouse,
region, filename, and location_named.
# Preferred: read from a tiny '.twb' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") if (nzchar(twb) && file.exists(twb)) { xml <- xml2::read_xml(twb) extract_named_connections(xml) } twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") if (nzchar(twbx) && file.exists(twbx)) { members <- twbx_list(twbx) twb_rows <- members$name[grepl("\\.twb$", members$name)] if (length(twb_rows) > 0L && !is.na(twb_rows[1])) { twb_member <- twb_rows[1] xml <- xml2::read_xml(utils::unzip(twbx, twb_member, exdir = tempdir())) extract_named_connections(xml) } }# Preferred: read from a tiny '.twb' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") if (nzchar(twb) && file.exists(twb)) { xml <- xml2::read_xml(twb) extract_named_connections(xml) } twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") if (nzchar(twbx) && file.exists(twbx)) { members <- twbx_list(twbx) twb_rows <- members$name[grepl("\\.twb$", members$name)] if (length(twb_rows) > 0L && !is.na(twb_rows[1])) { twb_member <- twb_rows[1] xml <- xml2::read_xml(utils::unzip(twbx, twb_member, exdir = tempdir())) extract_named_connections(xml) } }
Returns parameter columns (those with param-domain-type) and basic metadata,
including a best-effort current value if present.
extract_parameters(xml_doc)extract_parameters(xml_doc)
xml_doc |
An |
A tibble with columns:
Datasource name.
User-visible caption or cleaned internal name.
Internal Tableau name.
Tableau datatype.
Tableau role.
Tableau parameter domain type.
Underlying data-type (if present).
Current value if specified.
Always TRUE.
Raw table reference.
Cleaned table name.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) params <- extract_parameters(xml) head(params)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) params <- extract_parameters(xml) head(params)
Returns raw columns excluding calculated fields and parameters.
extract_raw_fields(xml_doc)extract_raw_fields(xml_doc)
xml_doc |
An |
A tibble with columns:
Datasource name.
User-visible caption or cleaned internal name.
Internal Tableau name.
Tableau datatype.
Tableau role.
Whether the field is hidden.
Always FALSE.
Raw table reference.
Cleaned table name.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) raw_fields <- extract_raw_fields(xml) head(raw_fields)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) raw_fields <- extract_raw_fields(xml) head(raw_fields)
<relation> tags from a TWBReturns a tibble of <relation> elements found in a Tableau TWB XML,
with key attributes and any custom SQL text.
extract_relations(xml_doc)extract_relations(xml_doc)
xml_doc |
An |
A tibble with columns:
name |
Relation name |
table |
Table reference |
connection |
Connection ID |
type |
Relation type (table, join, etc.) |
join |
Join type if applicable |
custom_sql |
Inline SQL text if present |
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) fields <- extract_columns_with_table_source(xml) inferred <- infer_implicit_relationships(fields) head(inferred)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) fields <- extract_columns_with_table_source(xml) inferred <- infer_implicit_relationships(fields) head(inferred)
Parses Tableau "relationships" (introduced in 2020.2) between logical tables, including the join predicate fields and operator.
extract_relationships(xml_doc)extract_relationships(xml_doc)
xml_doc |
An |
A tibble with columns:
relationship_type |
Always "Relationship" |
left_table |
Left table name |
right_table |
Right table name |
left_field |
Field name on left side |
operator |
Join operator (e.g., "=") |
right_field |
Field name on right side |
left_is_calc |
Logical, whether left field is a calculation |
right_is_calc |
Logical, whether right field is a calculation |
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) extract_relationships(xml)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) extract_relationships(xml)
Extract the .twb (and optionally all files) from a .twbx
extract_twb_from_twbx( twbx_path, extract_dir = file.path(tempdir(), paste0("twbx_", tools::file_path_sans_ext(basename(twbx_path)), "_", format(Sys.time(), "%Y%m%d%H%M%S"))), extract_all = FALSE )extract_twb_from_twbx( twbx_path, extract_dir = file.path(tempdir(), paste0("twbx_", tools::file_path_sans_ext(basename(twbx_path)), "_", format(Sys.time(), "%Y%m%d%H%M%S"))), extract_all = FALSE )
twbx_path |
Path to a |
extract_dir |
Directory to extract into (defaults to a timestamped temp dir). |
extract_all |
If |
List with twb_path, exdir, twbx_path, and manifest (tibble).
twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") res <- extract_twb_from_twbx(twbx, extract_all = FALSE) basename(res$twb_path)twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") res <- extract_twb_from_twbx(twbx, extract_all = FALSE) basename(res$twb_path)
Generates candidate join pairs by:
Matching semantic_role across different tables.
Matching field names (case-insensitive) across different tables.
infer_implicit_relationships(fields_df, max_pairs = 50000L)infer_implicit_relationships(fields_df, max_pairs = 50000L)
fields_df |
A data frame like the output of
|
max_pairs |
Maximum number of candidate pairs to return (default 50,000). |
A tibble with columns:
Left table name.
Left field name.
Right table name.
Right field name.
Why the pair was suggested.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) fields <- extract_columns_with_table_source(xml) inferred <- infer_implicit_relationships(fields) head(inferred)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) fields <- extract_columns_with_table_source(xml) inferred <- infer_implicit_relationships(fields) head(inferred)
Draws a quick base-graphics plot of a dependency graph. Vertices that are
calculated fields (present in fields_df$name) are drawn differently.
plot_dependency_graph(g, fields_df = NULL, seed = NULL)plot_dependency_graph(g, fields_df = NULL, seed = NULL)
g |
An |
fields_df |
Optional data frame with a |
seed |
Optional integer seed to make the layout reproducible. If |
Invisibly returns g.
fields <- tibble::tibble( name = c("X_plus_Y", "Z"), formula = c("[X] + [Y]", "[X_plus_Y] * 2") ) g <- build_dependency_graph(fields) plot_dependency_graph(g, fields) # nondeterministic layout plot_dependency_graph(g, fields, seed = 1) # deterministic layoutfields <- tibble::tibble( name = c("X_plus_Y", "Z"), formula = c("[X] + [Y]", "[X_plus_Y] * 2") ) g <- build_dependency_graph(fields) plot_dependency_graph(g, fields) # nondeterministic layout plot_dependency_graph(g, fields, seed = 1) # deterministic layout
Uses relationships_df with columns left_table, right_table,
left_field, right_field, and optional operator.
plot_relationship_graph(relationships_df, seed = NULL)plot_relationship_graph(relationships_df, seed = NULL)
relationships_df |
Data frame of field-level relationships. |
seed |
Optional integer seed to make the layout reproducible. If |
Invisibly returns the plotted graph.
Visualizes joins between sources. Expects joins_df with columns
left_table, right_table, left_field, right_field. If
relationships_df is provided (modern relationships), it will render a
second graph highlighting those relationships.
plot_source_join_graph(joins_df, relationships_df = NULL, seed = NULL)plot_source_join_graph(joins_df, relationships_df = NULL, seed = NULL)
joins_df |
Data frame with join edges. |
relationships_df |
Optional data frame with modern relationships. |
seed |
Optional integer seed to make layouts reproducible. If |
Invisibly returns the join graph, or a list list(joins = g, relationships = gr)
if relationships_df is provided.
Add a prettified formula column to calculated fields table
prettify_calculated_fields(calcs, strip_brackets = FALSE, wrap = 100L)prettify_calculated_fields(calcs, strip_brackets = FALSE, wrap = 100L)
calcs |
tibble from extract_calculated_fields() |
strip_brackets |
logical |
wrap |
integer wrap width; default 100 |
tibble with extra column formula_pretty
Prettify a Tableau calculation formula for display
tableau_formula_pretty(formula, strip_brackets = FALSE, wrap = NA_integer_)tableau_formula_pretty(formula, strip_brackets = FALSE, wrap = NA_integer_)
formula |
character scalar |
strip_brackets |
logical; remove [ ] around field names (default FALSE) [ ]: R:%20 |
wrap |
optional integer to hard-wrap lines (use NA to disable) |
character scalar (multi-line, indented)
Queries the Metadata (GraphQL) API for Custom SQL tables in the content graph.
tbs_custom_sql_graphql( content_id, base_url = Sys.getenv("TABLEAU_BASE_URL"), site = Sys.getenv("TABLEAU_SITE"), token = Sys.getenv("TABLEAU_PAT") )tbs_custom_sql_graphql( content_id, base_url = Sys.getenv("TABLEAU_BASE_URL"), site = Sys.getenv("TABLEAU_SITE"), token = Sys.getenv("TABLEAU_PAT") )
content_id |
Character. Workbook or datasource ID (GUID). |
base_url |
Character. Server/Cloud base URL (e.g., "https://..."). |
site |
Character. Site contentUrl ("" for default site). |
token |
Character. REST credentials token. |
A tibble with columns such as custom_sql_name, custom_sql_query,
database, schema. Zero rows if none.
tbs_custom_sql_graphql("abc-123")tbs_custom_sql_graphql("abc-123")
Returns an empty tibble when credentials are missing or the item is not found.
tbs_publish_info( content_id, base_url = Sys.getenv("TABLEAU_BASE_URL"), site = Sys.getenv("TABLEAU_SITE"), token = Sys.getenv("TABLEAU_PAT") )tbs_publish_info( content_id, base_url = Sys.getenv("TABLEAU_BASE_URL"), site = Sys.getenv("TABLEAU_SITE"), token = Sys.getenv("TABLEAU_PAT") )
content_id |
Character. Workbook or datasource ID (GUID). |
base_url |
Character. Server/Cloud base URL (e.g., "https://..."). |
site |
Character. Site contentUrl ("" for the default site). |
token |
Character. REST credentials token (from a prior sign-in). |
A tibble with columns like content_id, site, project, web_url,
created_at, updated_at. May be zero rows if unavailable.
tbs_publish_info("abc-123")tbs_publish_info("abc-123")
Returns every calculated field in the workbook enriched with a computation
category (calc_type), LOD sub-type, dependency count, and dependency depth
— the maximum number of calc-on-calc hops in the field's dependency chain.
twb_calc_complexity(x, include_parameters = FALSE)twb_calc_complexity(x, include_parameters = FALSE)
x |
A |
include_parameters |
Logical; if |
A tibble with columns:
Datasource the field belongs to.
Human-readable field name.
Bracketed internal Tableau name.
Field data type.
"measure" or "dimension".
One of "lod", "table_calc", "aggregate", "raw".
Tested in that precedence order.
"fixed", "include", or "exclude"; NA if not LOD.
Logical; existing heuristic flag preserved for backward compatibility.
Integer; longest chain of calc-on-calc dependencies.
0 means the field only references raw fields (or has no references).
Integer; count of distinct bracketed tokens in the formula.
Raw formula string.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_calc_complexity(xml)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_calc_complexity(xml)
Chart (mark) types per worksheet.
twb_charts(x)twb_charts(x)
x |
TwbParser or xml2 document. |
Tibble with columns: worksheet, mark_types (comma-separated).
Colors and palettes referenced in the workbook.
twb_colors(x)twb_colors(x)
x |
TwbParser or xml2 document. |
Tibble with columns describing palette names and explicit colors.
Finds every <relation formula="..."> node that looks like a SQL statement
and returns its name, type, raw SQL text, and a flag for whether it starts
with SELECT or WITH.
twb_custom_sql(x)twb_custom_sql(x)
x |
A |
A tibble with columns:
Name attribute of the relation node.
Type attribute (e.g. "text", "table").
Full SQL text.
TRUE when the text begins with SELECT or WITH.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_custom_sql(xml)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_custom_sql(xml)
Parses <action> nodes from dashboard <actions> sections and the
top-level workbook <actions> section. Returns one row per action.
twb_dashboard_actions(x, dashboard = NULL)twb_dashboard_actions(x, dashboard = NULL)
x |
A |
dashboard |
Optional character scalar. When supplied, only actions
whose |
A tibble with columns:
Caption / display name of the action.
"filter", "highlight", "url", "set", or
another type string from the XML.
Comma-separated list of source worksheet names.
Target worksheet name, or NA for URL actions.
Trigger: "select", "menu", or "hover".
URL value for URL-type actions; NA otherwise.
xml <- xml2::read_xml( '<workbook> <actions> <action caption="Filter 1" name="act1" type="filter"> <source-sheets> <source-sheet name="Sheet1"/> </source-sheets> <target-sheets> <target-sheet name="Sheet2"/> </target-sheets> <run-on type="select"/> </action> </actions> </workbook>' ) twb_dashboard_actions(xml)xml <- xml2::read_xml( '<workbook> <actions> <action caption="Filter 1" name="act1" type="filter"> <source-sheets> <source-sheet name="Sheet1"/> </source-sheets> <target-sheets> <target-sheet name="Sheet2"/> </target-sheets> <run-on type="select"/> </action> </actions> </workbook>' ) twb_dashboard_actions(xml)
Filters found on dashboards and their positions.
twb_dashboard_filters(x, dashboard = NULL)twb_dashboard_filters(x, dashboard = NULL)
x |
TwbParser or xml2 document. |
dashboard |
Optional dashboard name to filter to. |
Tibble with columns: dashboard, zone_id, zone_type, field, presentation, x, y, w, h.
Returns one row per zone per dashboard, including the parent-zone relationship and a tiled/floating classification.
twb_dashboard_layout(x, dashboard = NULL)twb_dashboard_layout(x, dashboard = NULL)
x |
A |
dashboard |
Optional character scalar to restrict output to one dashboard. |
A tibble with columns:
Dashboard name.
Zone identifier.
Parent zone identifier (NA for root zones).
"worksheet", "filter", "legend",
"parameter_control", "text", "image", "container", or "blank".
Referenced worksheet name or object, if applicable.
"floating" or "tiled".
Horizontal offset, or NA.
Vertical offset, or NA.
Width, or NA.
Height, or NA.
xml <- xml2::read_xml( '<workbook> <dashboards> <dashboard name="Overview"> <zones> <zone id="1" type="layoutV"> <zone id="2" worksheet="Sheet1" x="0" y="0" w="600" h="400"/> <zone id="3" type="filter" x="0" y="400" w="600" h="50"/> </zone> </zones> </dashboard> </dashboards> </workbook>' ) twb_dashboard_layout(xml)xml <- xml2::read_xml( '<workbook> <dashboards> <dashboard name="Overview"> <zones> <zone id="1" type="layoutV"> <zone id="2" worksheet="Sheet1" x="0" y="0" w="600" h="400"/> <zone id="3" type="filter" x="0" y="400" w="600" h="50"/> </zone> </zones> </dashboard> </dashboards> </workbook>' ) twb_dashboard_layout(xml)
Returns one row per worksheet per dashboard, with the zone's position on the canvas.
twb_dashboard_sheets(x, dashboard = NULL)twb_dashboard_sheets(x, dashboard = NULL)
x |
A |
dashboard |
Optional character scalar to restrict output to one dashboard. |
A tibble with columns:
Dashboard name.
Referenced worksheet name.
Zone identifier.
Horizontal offset (pixels), or NA.
Vertical offset (pixels), or NA.
Width (pixels), or NA.
Height (pixels), or NA.
xml <- xml2::read_xml( '<workbook> <dashboards> <dashboard name="Overview"> <zones> <zone id="1" worksheet="Sheet1" x="0" y="0" w="600" h="400"/> </zones> </dashboard> </dashboards> </workbook>' ) twb_dashboard_sheets(xml)xml <- xml2::read_xml( '<workbook> <dashboards> <dashboard name="Overview"> <zones> <zone id="1" worksheet="Sheet1" x="0" y="0" w="600" h="400"/> </zones> </dashboard> </dashboards> </workbook>' ) twb_dashboard_sheets(xml)
Per-dashboard summary (filters count and chart types).
twb_dashboard_summary(x)twb_dashboard_summary(x)
x |
TwbParser or xml2 document. |
Tibble with columns: dashboard, worksheet_count, zone_count, filters, chart_types.
Dashboards overview (count of zones and referenced worksheets).
twb_dashboards(x)twb_dashboards(x)
x |
TwbParser or xml2 document. |
Tibble with columns: dashboard, worksheet_count, zone_count.
Combines shelf placement and filter usage into a tidy long tibble showing where each field appears and in what capacity across all (or selected) worksheets.
twb_field_usage( x, include_filters = TRUE, include_shelves = TRUE, wide = FALSE )twb_field_usage( x, include_filters = TRUE, include_shelves = TRUE, wide = FALSE )
x |
A |
include_filters |
Logical; include filter appearances. Default |
include_shelves |
Logical; include shelf appearances (rows, cols, color,
size, etc.). Default |
wide |
Logical; if |
Long form (wide = FALSE): a tibble with columns:
Human-readable field name.
Datasource the field belongs to.
Worksheet name.
Usage context, e.g. "shelf:rows", "shelf:color",
"filter".
Number of times the field appears in this context on this sheet (handles multi-pill rows/cols).
Wide form (wide = TRUE): one row per (field_clean, datasource),
one column per sheet, cell value is a comma-separated context string or
NA.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_field_usage(xml) twb_field_usage(xml, wide = TRUE)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_field_usage(xml) twb_field_usage(xml, wide = TRUE)
Returns any <initial-sql> nodes found inside connection or
named-connection elements.
twb_initial_sql(x)twb_initial_sql(x)
x |
A |
A tibble with columns:
Name or caption of the parent connection element.
SQL text of the initial statement.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_initial_sql(xml)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_initial_sql(xml)
For a dashboard: one row per zone with component type, target (worksheet or field), filter presentation (if applicable), and x/y/w/h when present. For a worksheet: mark types, filters, legends, parameter controls. For a story: one row per story point with its referenced target.
twb_page_composition(x, name)twb_page_composition(x, name)
x |
TwbParser or xml2 document. |
name |
Page name (character scalar). |
Tibble with columns: page_type, page_name, component_type, zone_id, target, field, presentation, x, y, w, h.
List all pages (dashboards, worksheets, stories).
twb_pages(x)twb_pages(x)
x |
TwbParser or xml2 document. |
Tibble with columns: page_type, name.
Summary of all pages (counts and quick descriptors).
twb_pages_summary(x)twb_pages_summary(x)
x |
TwbParser or xml2 document. |
Tibble with columns including page_type, name, and count columns such as n_zones, n_worksheets, n_filters, n_legends, n_parameter_controls, n_story_points, and mark_types for worksheets.
Inspects datasource nodes and heuristically flags those that reference a published (server-side) source rather than an embedded one.
twb_published_refs(x)twb_published_refs(x)
x |
A |
A tibble with columns:
Internal datasource name.
User-visible caption.
Value of the hasconnection attribute.
TRUE when hasconnection = false or when the
node text contains published-source markers.
Short explanation of the classification.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_published_refs(xml)twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) twb_published_refs(xml)
Assembles all extracted intelligence — datasources, parameters, calculated fields with complexity classifications, field usage, filters, sorts, chart types, dashboard layout, and actions — into a single named list (or formatted text) ready for use when porting to another visualisation tool.
twb_replication_brief( x, dashboard = NULL, include_sql = TRUE, include_formulas = TRUE, format = c("list", "text") )twb_replication_brief( x, dashboard = NULL, include_sql = TRUE, include_formulas = TRUE, format = c("list", "text") )
x |
A |
dashboard |
Optional character scalar. When supplied, sheet-level sections (filters, sorts, chart types, field usage, layout) are scoped to the sheets that belong to this dashboard. |
include_sql |
Logical; include custom SQL blocks in |
include_formulas |
Logical; when |
format |
Either |
format = "list": a named list with elements:
1-row tibble: file name, counts, generation timestamp.
Datasource connection details.
Parameter fields with current values.
Custom SQL blocks, or NULL if include_sql = FALSE.
Tibble from twb_calc_complexity(), optionally
with a formula_pretty column.
Tibble from twb_field_usage().
Worksheet filters (scoped to dashboard if given).
Worksheet sorts (scoped to dashboard if given).
Mark types per worksheet.
Zone positions from twb_dashboard_sheets().
Dashboard actions from twb_dashboard_actions().
format = "text": a single character(1) with section headers and
tabular output.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) brief <- twb_replication_brief(xml) names(brief) brief$metatwb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") stopifnot(nzchar(twb), file.exists(twb)) xml <- xml2::read_xml(twb) brief <- twb_replication_brief(xml) names(brief) brief$meta
Reads per-axis style rules embedded in worksheet XML and returns one row per axis per worksheet.
twb_sheet_axes(x, sheet = NULL)twb_sheet_axes(x, sheet = NULL)
x |
A |
sheet |
Optional character scalar to restrict output to one worksheet. |
A tibble with columns:
Worksheet name.
Axis identifier (e.g., "rows", "cols", or "automatic").
Column reference if axis is field-specific; NA otherwise.
Human-readable field name; NA if not field-specific.
Scale type ("linear", "log", …) if present; NA otherwise.
Logical: TRUE if axis is reversed.
Logical: TRUE if zero is pinned on axis.
xml <- xml2::read_xml( '<workbook> <worksheets> <worksheet name="Sheet1"> <table> <style> <style-rule element="axis"> <format attr="reverse" value="false"/> <format attr="scale-include-zero" value="true"/> </style-rule> </style> </table> </worksheet> </worksheets> </workbook>' ) twb_sheet_axes(xml)xml <- xml2::read_xml( '<workbook> <worksheets> <worksheet name="Sheet1"> <table> <style> <style-rule element="axis"> <format attr="reverse" value="false"/> <format attr="scale-include-zero" value="true"/> </style-rule> </style> </table> </worksheet> </worksheets> </workbook>' ) twb_sheet_axes(xml)
Returns one row per filter per worksheet, with full details on filter class, inclusion mode, categorical members, and numeric or date range bounds.
twb_sheet_filters(x, sheet = NULL)twb_sheet_filters(x, sheet = NULL)
x |
A |
sheet |
Optional character scalar to restrict output to one worksheet. |
A tibble with columns:
Worksheet name.
Raw column-reference attribute value.
Human-readable field name.
Datasource name.
Tableau filter class: "categorical", "range",
"relative-date", "date", "set", "top", etc.
"include" or "exclude".
Comma-separated member values for categorical filters; NA otherwise.
Lower bound for range/quantitative filters; NA otherwise.
Upper bound; NA otherwise.
xml <- xml2::read_xml( '<workbook> <worksheets> <worksheet name="Sheet1"> <table> <view> <filter class="categorical" column="[ds].[Category]"> <groupfilter function="union"> <groupfilter function="member" member="[Furniture]"/> <groupfilter function="member" member="[Technology]"/> </groupfilter> </filter> </view> </table> </worksheet> </worksheets> </workbook>' ) twb_sheet_filters(xml)xml <- xml2::read_xml( '<workbook> <worksheets> <worksheet name="Sheet1"> <table> <view> <filter class="categorical" column="[ds].[Category]"> <groupfilter function="union"> <groupfilter function="member" member="[Furniture]"/> <groupfilter function="member" member="[Technology]"/> </groupfilter> </filter> </view> </table> </worksheet> </worksheets> </workbook>' ) twb_sheet_filters(xml)
Returns a tidy tibble describing which fields are placed on each visual shelf (rows, cols, color, size, label, detail, tooltip, etc.) for every worksheet in the workbook (or a single named sheet).
twb_sheet_shelves(x, sheet = NULL)twb_sheet_shelves(x, sheet = NULL)
x |
A |
sheet |
Optional character scalar. When supplied only that worksheet is returned; otherwise all worksheets are returned. |
A tibble with columns:
Worksheet name.
Shelf name: "rows", "cols", or an encoding type such as
"color", "size", "label", "detail", "tooltip", "shape",
"text", "path", "angle", "lod", "geometry", etc.
Raw column-reference attribute value.
Field instance name (after stripping datasource prefix).
Human-readable field name.
Datasource name referenced.
Aggregation function ("SUM", "AVG", …) or NA.
xml <- xml2::read_xml( '<workbook> <worksheets> <worksheet name="Sales"> <table> <rows>[ds].[Category]</rows> <cols>[ds].[Sales]</cols> <panes> <pane> <mark class="Bar"/> <encodings> <color column="[ds].[Category]"/> </encodings> </pane> </panes> </table> </worksheet> </worksheets> </workbook>' ) twb_sheet_shelves(xml)xml <- xml2::read_xml( '<workbook> <worksheets> <worksheet name="Sales"> <table> <rows>[ds].[Category]</rows> <cols>[ds].[Sales]</cols> <panes> <pane> <mark class="Bar"/> <encodings> <color column="[ds].[Category]"/> </encodings> </pane> </panes> </table> </worksheet> </worksheets> </workbook>' ) twb_sheet_shelves(xml)
Returns one row per sort directive per worksheet.
twb_sheet_sorts(x, sheet = NULL)twb_sheet_sorts(x, sheet = NULL)
x |
A |
sheet |
Optional character scalar to restrict output to one worksheet. |
A tibble with columns:
Worksheet name.
Raw column-reference attribute.
Human-readable field name.
Datasource name.
"ascending" or "descending".
Sort method: "field", "alphabetic", "manual", "data-source-order", etc.
xml <- xml2::read_xml( '<workbook> <worksheets> <worksheet name="Sheet1"> <table> <view> <sort class="sum" column="[ds].[Sales]" direction="descending"/> </view> </table> </worksheet> </worksheets> </workbook>' ) twb_sheet_sorts(xml)xml <- xml2::read_xml( '<workbook> <worksheets> <worksheet name="Sheet1"> <table> <view> <sort class="sum" column="[ds].[Sales]" direction="descending"/> </view> </table> </worksheet> </worksheets> </workbook>' ) twb_sheet_sorts(xml)
Initialize the parser from a .twb or .twbx path.
Return the TWBX manifest (if available).
Return TWBX extract entries.
Return TWBX image entries.
Extract files from the TWBX to disk.
Fields placed on visual shelves for one or all worksheets.
Detailed filter configuration for one or all worksheets.
Axis configuration for one or all worksheets.
Sort directives for one or all worksheets.
Worksheets embedded in one or all dashboards.
Full zone layout with container hierarchy.
Dashboard and workbook actions.
Calculated field complexity classifications.
Field usage matrix across worksheets.
Full replication brief for the workbook or a single dashboard.
Validate relationships; optionally stop on failure.
Print a concise summary of parsed content.
path |
Path to a |
types |
Optional vector of types (e.g., |
pattern |
Optional regex to match archive paths. |
files |
Optional explicit archive paths to extract. |
exdir |
Output directory (defaults to parser's twbx dir or tempdir()). |
sheet |
Optional worksheet name. |
include_parameters |
Logical; include parameter fields. Default |
include_filters |
Include filter appearances. Default |
include_shelves |
Include shelf appearances. Default |
wide |
Return wide format (one col per sheet). Default |
dashboard |
Optional dashboard name to scope the brief. |
include_sql |
Include custom SQL blocks. Default |
include_formulas |
Add |
format |
|
error |
If |
An R6 class generator.
Create a parser for Tableau .twb / .twbx files. On initialization, the
parser reads the XML and precomputes relationships, joins, fields, calculated
fields, inferred relationships, and datasource details. For .twbx, it also
extracts the largest .twb and records a manifest.
Path to the .twb or .twbx file on disk.
Parsed xml2 document of the workbook.
Original .twbx path if the workbook was packaged.
Directory where the .twbx was extracted.
Tibble of .twbx contents from twbx_list().
Tibble of <relation> nodes from extract_relations().
Tibble of join clauses from extract_joins().
Tibble of modern relationships from extract_relationships().
Tibble of inferred relationship pairs by name and role.
List containing data_sources, parameters, and all_sources.
Tibble of raw fields with table information.
Tibble of calculated fields.
Result from validate() as list with ok and issues elements.
Create a parser from .twb or .twbx file.
Return .twbx manifest tibble.
Return .twbx extract entries.
Return .twbx image entries.
Extract files from .twbx archive.
Return relations tibble.
Return joins tibble.
Return modern relationships tibble.
Return inferred relationship pairs.
Return datasource details tibble.
Return parameters tibble.
Return all sources tibble.
Return raw fields tibble.
Return calculated fields tibble. When pretty = TRUE, includes a
formula_pretty column with line breaks and indentation.
Validate relationships. Stops execution if error = TRUE.
Print a brief summary to console.
Extract specific files from a .twbx
twbx_extract_files( twbx_path, files = NULL, pattern = NULL, types = NULL, exdir = NULL )twbx_extract_files( twbx_path, files = NULL, pattern = NULL, types = NULL, exdir = NULL )
twbx_path |
Path to a |
files |
Vector of archive paths to extract (optional). |
pattern |
Perl regex to match archive paths (optional). |
types |
Subset by |
exdir |
Output directory (defaults to temp). |
Tibble with name, type, and out_path of extracted files.
twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") files <- twbx_extract_files(twbx, types = c("workbook")) head(files)twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") files <- twbx_extract_files(twbx, types = c("workbook")) head(files)
List contents of a Tableau .twbx
twbx_list(twbx_path)twbx_list(twbx_path)
twbx_path |
Path to a |
Tibble with columns: name, size_bytes, modified, type.
twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") twbx_list(twbx)twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") twbx_list(twbx)
Checks that relationship endpoints reference known datasource tables and that
the predicate fields appear somewhere in the workbook (calculated, raw, or
parameter fields), using a lenient token match (e.g., INT([GEOID]) = GEOID).
validate_relationships(parser, strict = FALSE)validate_relationships(parser, strict = FALSE)
parser |
A |
strict |
Logical. Reserved for future table-scoped checks that can be overly conservative with federated sources. Currently not used. |
A list with:
TRUE if no issues; FALSE otherwise.
A named list of tibbles. Possible elements:
unknown_tables: endpoints not found among known tables.
unknown_fields: predicate fields not found in the field pool.
twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") if (nzchar(twb) && file.exists(twb)) { parser <- TwbParser$new(twb) res <- validate_relationships(parser) if (!res$ok) print(res$issues) }twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") if (nzchar(twb) && file.exists(twb)) { parser <- TwbParser$new(twb) res <- validate_relationships(parser) if (!res$ok) print(res$issues) }