diff --git a/Cargo.lock b/Cargo.lock
index 2bc8ec6c327639d9647f7c06b2a970b7356ac546..ec56aac3bd001e426bb56eac065903049031610e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -107,6 +107,7 @@ dependencies = [
  "pretty_assertions",
  "regex",
  "reqwest",
+ "sanitize-filename",
  "scraper",
  "serde",
  "serde_json",
@@ -114,7 +115,9 @@ dependencies = [
  "termination",
  "thiserror",
  "tokio",
+ "unicode-width",
  "url",
+ "urlencoding",
 ]
 
 [[package]]
@@ -1545,6 +1548,16 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "sanitize-filename"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603"
+dependencies = [
+ "lazy_static",
+ "regex",
+]
+
 [[package]]
 name = "schannel"
 version = "0.1.23"
@@ -2060,6 +2073,12 @@ dependencies = [
  "percent-encoding",
 ]
 
+[[package]]
+name = "urlencoding"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
+
 [[package]]
 name = "utf-8"
 version = "0.7.6"
diff --git a/Cargo.toml b/Cargo.toml
index 1e428042c3612528f7ed6d68cd7adb069b9386bd..a56e976543d8a750c2794d81f4e497a4a332402e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,6 +25,7 @@ itertools = "0.11.0"
 num_cpus = "1.16.0"
 regex = "1.10.2"
 reqwest = "0.11.22"
+sanitize-filename = "0.5.0"
 scraper = "0.18.1"
 serde = { version = "1.0.190", features = ["derive"] }
 serde_json = "1.0.108"
@@ -32,7 +33,9 @@ serde_yaml = "0.9.27"
 termination = "0.1.2"
 thiserror = "1.0.50"
 tokio = { version = "1.33.0", features = ["full"] }
+unicode-width = "0.1.11"
 url = "2.4.1"
+urlencoding = "2.1.3"
 
 [dev-dependencies]
 assert_cmd = "2.0.12"
diff --git a/README.md b/README.md
index 04090076479cb604a9f48cb892dc5598dfc01bad..a404e625767a080de62b158b60c0c4c7b59851e9 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
 # archwiki-rs 📖
 A CLI tool to read pages from the ArchWiki
 
-## Table of contents
+<!-- toc -->
+
 - [Installation](#installation)
   * [crates.io](#cratesio)
   * [Source](#source)
@@ -10,18 +11,22 @@ A CLI tool to read pages from the ArchWiki
     + [Basic request](#basic-request)
     + [Using a different format](#using-a-different-format)
     + [Caching](#caching)
-    + [404 page not found (-̥̥̥n-̥̥̥ )](#404-page-not-found--̥̥̥n-̥̥̥-)
+    + [404 page not found (-̥̥̥n-̥̥̥ )](#404-page-not-found--%CC%A5%CC%A5%CC%A5n-%CC%A5%CC%A5%CC%A5-)
   * [Searching the ArchWiki](#searching-the-archwiki)
     + [Search by title](#search-by-title)
     + [Search for text](#search-for-text)
   * [Downloading wiki info](#downloading-wiki-info)
-    + [Possible speed-ups](#possible-speed-ups)
   * [Listing ArchWiki information](#listing-archwiki-information)
     + [Listing pages](#listing-pages)
     + [Listing categories](#listing-categories)
     + [Listing languages](#listing-languages)
+  * [Downloading a local copy of the ArchWiki](#downloading-a-local-copy-of-the-archwiki)
+    + [Possible speed-ups](#possible-speed-ups)
   * [Other Information](#other-information)
 - [Plugins](#plugins)
+- [Alternatives](#alternatives)
+
+<!-- tocstop -->
 
 ## Installation
 Currently, you can only install this tool from [ crates.io ](https://crates.io/crates/archwiki-rs) 
@@ -83,7 +88,7 @@ uses stderr to give the user suggestions on what they might have wanted to type.
 
 
 An example shell script to do something like this is available in the [repository](https://github.com/jackboxx/archwiki-rs)
-under the name `example.sh`.
+under the name `example.sh` which can be used like this `sh example.sh <page-name>`.
 
 ### Searching the ArchWiki
 
@@ -106,25 +111,13 @@ that the search term is in
 
 ### Downloading wiki info
 
-Page names are stored locally to prevent having to scrape the entire table of contents of
-the ArchWiki with every command.
-
-Use this command to fetch all page names. 
-Be warned, since this scrapes multiple thousand links, this can be quite  slow (-, - )…zzzZZ
+Page and category names are stored locally for faster look-ups.
+Use this command to fetch all page and category names. 
 
 ```sh
 archwiki-rs sync-wiki
 ```
 
-#### Possible speed-ups
-
-If you don't mind your CPU and network becoming a bit saturated you can increase the
-amount of threads used to fetch data from the wiki
-
-```sh
-archwiki-rs sync-wiki -t 8
-```
-
 ### Listing ArchWiki information
 
 #### Listing pages
@@ -163,6 +156,27 @@ And the same for available languages
 archwiki-rs list-languages
 ```
 
+### Downloading a local copy of the ArchWiki
+
+Use this command to download a local copy of the ArchWiki. Be warned, this command makes over
+10,000 requests to the ArchWiki so it takes a while to finish (-, -)…zzzZZ
+
+```sh
+archwiki-rs local-wiki ~/local-archwiki --format markdown
+```
+
+#### Possible speed-ups
+
+If you don't mind your CPU and network becoming a bit saturated you can increase the
+amount of threads used to fetch data from the wiki. 
+
+Keep in mind that you might get rate limited by the ArchWiki if make too many requests at once.
+
+```sh
+archwiki-rs local-wiki -t 8
+```
+
+
 ### Other Information
 
 Other information such as the value/location of the `cache directory` can be obtained
@@ -185,3 +199,8 @@ Here's a list of programs that have plugins for `archwiki-rs` to make your life
 
 - [Neovim](https://github.com/Jackboxx/archwiki-nvim)
 - [Obsidian](https://github.com/Jackboxx/archwiki-obsidian)
+
+## Alternatives
+
+If you are using Arch Linux a great alternative for this tool is the `wikiman` CLI tool
+in combination with the `arch-wiki-docs` package.
diff --git a/src/categories.rs b/src/categories.rs
index a26a7020998901823b50f489a09e865c3f792ae4..f14a0e52a55a204a220e460c9de96ebd9b36d031 100644
--- a/src/categories.rs
+++ b/src/categories.rs
@@ -1,21 +1,9 @@
-use ::futures::future;
-use indicatif::{MultiProgress, ProgressBar};
-use itertools::Itertools;
-use scraper::{Html, Node, Selector};
-use std::{collections::HashMap, thread, time::Duration};
-use url::Url;
+#![allow(unused)]
 
-#[derive(Debug, Clone)]
-struct CategoryListItem {
-    name: String,
-    url: String,
-}
+use itertools::Itertools;
+use std::collections::HashMap;
 
-use crate::{
-    error::WikiError,
-    utils::{extract_tag_attr, get_elements_by_tag, HtmlTag},
-    wiki_api::fetch_page_by_url,
-};
+use crate::error::WikiError;
 
 /// Returns a print ready list of the provided page names in
 /// 1. A tree format if `flatten` is `false`:
@@ -39,13 +27,32 @@ use crate::{
 /// If it is not flattened the list is first ordered by category names and then by page names withing those
 /// categories.
 /// If it is flattened then it will by sorted by page names.
-pub fn list_pages(categories: &HashMap<String, Vec<String>>, flatten: bool) -> String {
+pub fn list_pages(
+    wiki_tree: &HashMap<String, Vec<String>>,
+    categories_filter: Option<&[String]>,
+    flatten: bool,
+) -> String {
     if flatten {
-        return categories.values().flatten().unique().sorted().join("\n");
+        return wiki_tree
+            .iter()
+            .filter_map(|(cat, pages)| {
+                categories_filter
+                    .map(|filter| filter.iter().contains(cat).then_some(pages))
+                    .unwrap_or(Some(pages))
+            })
+            .flatten()
+            .unique()
+            .sorted()
+            .join("\n");
     }
 
-    categories
+    wiki_tree
         .iter()
+        .filter_map(|(cat, pages)| {
+            categories_filter
+                .map(|filter| filter.iter().contains(cat).then_some((cat, pages)))
+                .unwrap_or(Some((cat, pages)))
+        })
         .sorted()
         .map(|(cat, pages)| {
             let list = pages.iter().map(|p| format!("───┤{p}")).join("\n");
@@ -54,128 +61,3 @@ pub fn list_pages(categories: &HashMap<String, Vec<String>>, flatten: bool) -> S
         })
         .join("\n\n")
 }
-
-/// Scrapes the ArchWiki for all page names and their immediate parent category. Category nesting
-/// is ignored as a category can be a sub category of multiple other categories.
-///
-/// Caution this function will most likely take several minutes to finish (-, – )…zzzZZ
-pub async fn fetch_all_pages(
-    hide_progress: bool,
-    thread_count: usize,
-    max_categories: Option<u32>,
-    start_at: Option<&str>,
-) -> Result<HashMap<String, Vec<String>>, WikiError> {
-    let from = start_at.unwrap_or("");
-    let limit = max_categories.unwrap_or(10000);
-
-    let base_url = "https://wiki.archlinux.org/index.php?title=Special:Categories";
-
-    let url = Url::parse_with_params(
-        base_url,
-        &[("from", from), ("limit", limit.to_string().as_str())],
-    )?;
-
-    let document = fetch_page_by_url(url).await?;
-
-    let body_class = ".mw-spcontent";
-    let selector = Selector::parse(body_class)
-        .unwrap_or_else(|_| panic!("{body_class} should be valid selector"));
-
-    let body = document.select(&selector).next().unwrap();
-
-    let category_list_element = get_elements_by_tag(*body, &HtmlTag::Ul)
-        .into_iter()
-        .next()
-        .unwrap();
-
-    let items = parse_category_list(category_list_element);
-    let multi_bar = MultiProgress::new();
-
-    let chunk_count = items.len() / thread_count;
-    let tasks = items
-        .chunks(chunk_count)
-        .map(|chunk| {
-            let chunk = chunk.to_vec();
-            let bar = ProgressBar::new(chunk.len().try_into().unwrap_or(0));
-            let bar = multi_bar.add(bar);
-            if hide_progress {
-                bar.finish_and_clear();
-            }
-
-            tokio::spawn(async move {
-                let mut res = Vec::with_capacity(chunk.len());
-                for item in chunk {
-                    let pages = match fetch_page_names_from_categoriy(&item.url).await {
-                        Ok(pages) => pages,
-
-                        Err(_) => {
-                            thread::sleep(Duration::from_secs(1));
-                            fetch_page_names_from_categoriy(&item.url)
-                                .await
-                                .unwrap_or_else(|err| {
-                                    eprintln!(
-                                        "failed to fetch pages in category {}\n ERROR {err}",
-                                        item.name
-                                    );
-                                    vec![]
-                                })
-                        }
-                    };
-
-                    res.push((item.name, pages));
-                    bar.inc(1);
-                }
-
-                res
-            })
-        })
-        .collect_vec();
-
-    let out = future::join_all(tasks)
-        .await
-        .into_iter()
-        .flatten()
-        .flatten()
-        .collect_vec();
-
-    Ok(HashMap::from_iter(out))
-}
-
-fn parse_category_list(list_node: ego_tree::NodeRef<'_, scraper::Node>) -> Vec<CategoryListItem> {
-    let list_items = get_elements_by_tag(list_node, &HtmlTag::Li);
-    list_items
-        .into_iter()
-        .flat_map(|li| {
-            let a_tag = li.first_child()?;
-            let a_tag_element = a_tag.value().as_element()?;
-
-            let name = a_tag.first_child()?.value().as_text()?.to_string();
-            let url = extract_tag_attr(a_tag_element, &HtmlTag::A, "href")?;
-
-            Some(CategoryListItem { name, url })
-        })
-        .collect()
-}
-
-/// Scrape the ArchWiki for a list of all page names that belong to a specific category
-async fn fetch_page_names_from_categoriy(url_str: &str) -> Result<Vec<String>, WikiError> {
-    let selector = Selector::parse("#mw-pages").expect("#mw-pages to be a valid css selector");
-
-    let body = reqwest::get(url_str).await?.text().await?;
-    let document = Html::parse_document(&body);
-
-    let Some(page_container) =  document.select(&selector).next() else {
-        return Ok(vec![])
-    };
-
-    Ok(page_container
-        .descendants()
-        .filter_map(|node| {
-            if let Node::Element(e) = node.value() {
-                extract_tag_attr(e, &HtmlTag::A, "title")
-            } else {
-                None
-            }
-        })
-        .collect())
-}
diff --git a/src/cli.rs b/src/cli.rs
index b2f6a58da18d5078fb39bf0f3b0a6509fc25b769..6be1d1d8fcd7e8835ad890a3dc4a4398cbbc7f8f 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -15,7 +15,7 @@ pub struct CliArgs {
 pub enum Commands {
     #[command(
         about = "Read a page from the ArchWiki",
-        long_about = "Read a page from the ArchWiki, if the page is not found similar page names are recommended. A list of page names is in the pages.yml file which can be updated with the 'sync-wiki' command."
+        long_about = "Read a page from the ArchWiki, if the page is not found similar page names are recommended"
     )]
     ReadPage {
         #[arg(short, long)]
@@ -26,7 +26,7 @@ pub enum Commands {
         ignore_cache: bool,
         #[arg(short, long)]
         /// Don't invalidate the cache even if it is considered stale. A cache is considered stale
-        /// after it hasn't been updated in more then 14 days.
+        /// after it hasn't been updated in more then 14 days
         disable_cache_invalidation: bool,
         #[arg(short, long)]
         /// Show URLs for plain-text output
@@ -42,7 +42,7 @@ pub enum Commands {
     },
     #[command(
         about = "Search the ArchWiki for pages",
-        long_about = "Search the ArchWiki for pages by title. Uses the 'opensearch' API action to perform queries."
+        long_about = "Search the ArchWiki for pages"
     )]
     Search {
         search: String,
@@ -53,74 +53,90 @@ pub enum Commands {
         /// Maximum number of results
         limit: u16,
         #[arg(short, long)]
-        /// Search for pages by text content instead of title. Uses the 'query' API action instead
-        /// of 'opensearch'.
+        /// Search for pages by text content instead of title
         text_search: bool,
     },
     #[command(
         about = "List all pages from the ArchWiki that have been downloaded",
-        long_about = "List all pages from the ArchWiki that have been downloaded. See 'sync-wiki' for information on downloading."
+        long_about = "List all pages from the ArchWiki that have been downloaded. See 'sync-wiki' for information on downloading"
     )]
     ListPages {
         #[arg(short, long)]
         /// Flatten all pages and don't show their category names
         flatten: bool,
+        #[arg(short, long, value_delimiter = ',')]
+        /// Only show pages in these categories
+        categories: Vec<String>,
         #[arg(short, long)]
-        /// Only show pages in this category
-        category: Option<String>,
-        #[arg(short, long)]
-        /// Use different file to read pages from
+        /// Use a different file to read pages from
         page_file: Option<PathBuf>,
     },
     #[command(
         about = "List all categories from the ArchWiki that have been downloaded",
-        long_about = "List categories  from the ArchWiki that have been downloaded. See 'sync-wiki' for information on downloading."
+        long_about = "List categories  from the ArchWiki that have been downloaded. See 'sync-wiki' for information on downloading"
     )]
     ListCategories {
         #[arg(short, long)]
-        /// Use different file to read pages from
+        /// Use a different file to read pages from
         page_file: Option<PathBuf>,
     },
     #[command(
         about = "List all languages that the ArchWiki supports",
-        long_about = "List all languages that the ArchWiki supports."
+        long_about = "List all languages that the ArchWiki supports"
     )]
     ListLanguages,
     #[command(
-        about = "Download the names of all pages on the ArchWiki",
-        long_about = "Download the names of all pages on the ArchWiki. Page names are used for the 'list-pages' and 'list-categories' commands"
+        about = "Download information about the pages and categories on the ArchWiki",
+        long_about = "Download information about the pages and categories on the ArchWiki. Page and category names are used for the 'list-pages' and 'list-categories' sub-commands"
     )]
     SyncWiki {
         #[arg(short = 'H', long)]
         /// Hide progress indicators
         hide_progress: bool,
         #[arg(short, long)]
-        /// Number of threads to use for fetching data from the ArchWiki
+        /// Print result to stdout instead of writing to a file. Output is formatted as YAML
+        print: bool,
+        #[arg(short, long)]
+        /// Use custom output file location
+        out_file: Option<PathBuf>,
+    },
+    #[command(
+        about = "Download a copy of the ArchWiki. Will take a long time :)",
+        long_about = "Download a copy of the ArchWiki. Will take a long time :). The exact hierarchy of the wiki is not mainted, sub-categories are put at the top level of the wiki directory"
+    )]
+    LocalWiki {
+        #[arg(short, long)]
+        /// Amount of threads to use for fetching pages from the ArchWiki. If not provided the
+        /// number of physical cores is used
         thread_count: Option<usize>,
         #[arg(short, long)]
-        /// Maximum amount of categories to fetch. If no value if provided all categories are
-        /// fetched.
-        max_categories: Option<u32>,
+        /// Use a different file to read pages from
+        page_file: Option<PathBuf>,
+        #[arg(short = 'H', long)]
+        /// Hide progress indicators
+        hide_progress: bool,
         #[arg(short, long)]
-        /// First category that will be fetched. See 'https://wiki.archlinux.org/index.php?title=Special:Categories' for more information.
-        start_at: Option<String>,
+        /// Show URLs in plain-text files
+        show_urls: bool,
         #[arg(short, long)]
-        /// Print result to stdout instead of writing to a file. Output is formatted as YAML.
-        print: bool,
+        /// Override already downloaded files
+        override_existing_files: bool,
+        #[arg(short, long, value_enum, default_value_t = PageFormat::PlainText)]
+        /// The format that the page should be displayed in
+        format: PageFormat,
+        /// Location to store the local copy of the wiki at
+        location: PathBuf,
     },
     #[command(
         about = "Retrive information related to this tool",
-        long_about = "Retrive information related to this tool. All Info is shown by default."
+        long_about = "Retrive information related to this tool"
     )]
     Info {
         #[arg(short = 'c', long)]
-        /// Location of the cache directory
         show_cache_dir: bool,
         #[arg(short = 'd', long)]
-        /// Location of the data directory
         show_data_dir: bool,
         #[arg(short, long)]
-        /// Only show values and not the properties they belong to or their descriptions
         only_values: bool,
     },
 }
diff --git a/src/error.rs b/src/error.rs
index 4cc13cd6ccaa0d505edd3e1065eaed4868888064..c7294064543a8f5a9bc509a95d17383dd1a5b676 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -48,6 +48,4 @@ pub enum WikiError {
     InvalidApiResponse(InvalidApiResponseError),
     #[error("{}", .0)]
     NoPageFound(String),
-    #[error("The category '{}' could not be found", .0)]
-    NoCategoryFound(String),
 }
diff --git a/src/formats/html.rs b/src/formats/html.rs
index 090cafd3dace04734b3426193155765ad4993f01..6ceca4cad042b90255c14ef249ff2487be469144 100644
--- a/src/formats/html.rs
+++ b/src/formats/html.rs
@@ -1,41 +1,39 @@
-use scraper::Html;
-
-use crate::utils::get_page_content;
+use scraper::{Html, Selector};
 
 /// Converts the body of the ArchWiki page to a HTML string
 pub fn convert_page_to_html(document: &Html, page: &str) -> String {
-    let content = get_page_content(document).expect("page should have content");
-
+    let body_selector = Selector::parse("body").expect("body should be a valid css selector");
     format!(
         "<h1>{heading}</h1>\n{body}",
         heading = page,
-        body = content.html()
+        body = document
+            .select(&body_selector)
+            .next()
+            .map(|body| body.inner_html())
+            .unwrap_or_default()
     )
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::utils::PAGE_CONTENT_CLASS;
     use pretty_assertions::assert_eq;
 
     #[tokio::test]
     async fn test_convert_page_to_html() {
         let page = "test page";
-        let input = format!(
-            r#"<div class="{PAGE_CONTENT_CLASS}">
+        let input = r#"<div>
     <title>Hello, world!</title>
-</div>"#
-        );
+</div>"#;
 
         let expected_output = format!(
             r#"<h1>{page}</h1>
-<div class="{PAGE_CONTENT_CLASS}">
+<div>
     <title>Hello, world!</title>
 </div>"#
         );
 
-        let document = Html::parse_document(&input);
+        let document = Html::parse_document(input);
         let output = convert_page_to_html(&document, page);
 
         assert_eq!(output, expected_output);
diff --git a/src/formats/markdown.rs b/src/formats/markdown.rs
index c36449d03b8bdf5ed615292ab1d90c21202cb6ca..23a2ba90ccc4d9c1427ee9574896ef5eb78b3a22 100644
--- a/src/formats/markdown.rs
+++ b/src/formats/markdown.rs
@@ -1,29 +1,22 @@
 use scraper::Html;
 
-use crate::utils::get_page_content;
-
 /// Converts the body of the ArchWiki page to a Markdown string
 pub fn convert_page_to_markdown(document: &Html, page: &str) -> String {
-    let content = get_page_content(document).expect("page should have content");
-
-    let md = html2md::parse_html(&content.html());
+    let md = html2md::parse_html(&document.html());
     format!("# {heading}\n\n{body}", heading = page, body = md)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::utils::PAGE_CONTENT_CLASS;
     use pretty_assertions::assert_eq;
 
     #[tokio::test]
     async fn test_convert_page_to_markdown() {
         let page = "test page";
-        let input = format!(
-            r#"<div class="{PAGE_CONTENT_CLASS}">
-    <h3>Hello, world!</h3>
-</div>"#
-        );
+        let input = r#"<div>
+            <h3>Hello, world!</h3>
+            </div>"#;
 
         let expected_output = format!(
             r#"# {page}
@@ -31,7 +24,7 @@ mod tests {
 ### Hello, world! ###"#
         );
 
-        let document = Html::parse_document(&input);
+        let document = Html::parse_document(input);
         let output = convert_page_to_markdown(&document, page);
 
         assert_eq!(output, expected_output);
diff --git a/src/formats/plain_text.rs b/src/formats/plain_text.rs
index 6b7b6a2818f1cd32fd6af65fcaef470c4c1bbddb..754d4f95d6daf9d3ac4412ac7ecafb6bc2d1e0c2 100644
--- a/src/formats/plain_text.rs
+++ b/src/formats/plain_text.rs
@@ -2,14 +2,13 @@ use colored::Colorize;
 use ego_tree::NodeRef;
 use scraper::{Html, Node};
 
-use crate::utils::{extract_tag_attr, get_page_content, HtmlTag};
+use crate::utils::extract_tag_attr;
 
 /// Converts the body of the ArchWiki page to a plain text string, removing all tags and
 /// only leaving the text node content. URLs can be shown in a markdown like syntax.
 pub fn convert_page_to_plain_text(document: &Html, show_urls: bool) -> String {
-    let content = get_page_content(document).expect("page should have content");
-
-    content
+    document
+        .root_element()
         .children()
         .map(|node| format_children(node, show_urls))
         .collect::<Vec<String>>()
@@ -30,7 +29,7 @@ pub fn format_children(node: NodeRef<Node>, show_urls: bool) -> String {
                 if show_urls {
                     wrap_text_in_url(
                         &child_text,
-                        &extract_tag_attr(e, &HtmlTag::A, "href").unwrap_or("".to_string()),
+                        &extract_tag_attr(e, "a", "href").unwrap_or("".to_string()),
                     )
                 } else {
                     child_text
@@ -86,41 +85,34 @@ fn wrap_text_in_url(text: &str, url: &str) -> String {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::utils::PAGE_CONTENT_CLASS;
     use pretty_assertions::assert_eq;
 
     #[tokio::test]
     async fn test_convert_page_to_plain_text() {
         {
-            let input = format!(
-                r#"<div class="{PAGE_CONTENT_CLASS}">
-    <h3>Hello, world!</h3>
-    <div>how <span><bold>are</bold></span> you</div>
-    I'm great
-</div>"#
-            );
-
-            let expected_output = format!(
-                r#"
-    Hello, world!
-    how are you
-    I'm great
-"#
-            );
-
-            let document = Html::parse_document(&input);
+            let input = r#"<div">
+                <h3>Hello, world!</h3>
+                <div>how <span><bold>are</bold></span> you</div>
+                I'm great
+                </div>"#;
+
+            let expected_output = r#"
+                Hello, world!
+                how are you
+                I'm great
+                "#;
+
+            let document = Html::parse_document(input);
             let output = convert_page_to_plain_text(&document, false);
 
             assert_eq!(output, expected_output);
         }
 
         {
-            let input = format!(
-                r#"<div class="{PAGE_CONTENT_CLASS}">
+            let input = r#"<div>
     <h3>Hello, world!</h3>
     <a href="example.com">example</a>
-</div>"#
-            );
+</div>"#;
 
             let expected_output = format!(
                 r#"
diff --git a/src/main.rs b/src/main.rs
index 493cb2a7802b7aa029c83a95fb2cf628d76f4ae7..9ccc9cf3d6748012e8692269afe719708e772d7b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,22 +1,24 @@
-use std::{collections::HashMap, fs};
+use std::fs;
 
 use clap::Parser;
 use cli::{CliArgs, Commands};
 use directories::BaseDirs;
 use error::WikiError;
 use formats::plain_text::convert_page_to_plain_text;
+
 use itertools::Itertools;
-use scraper::Html;
-use url::Url;
-use wiki_api::fetch_page_by_url;
 
 use crate::{
-    categories::{fetch_all_pages, list_pages},
+    categories::list_pages,
     formats::{html::convert_page_to_html, markdown::convert_page_to_markdown, PageFormat},
     languages::{fetch_all_langs, format_lang_table},
     search::{format_open_search_table, format_text_search_table, open_search_to_page_url_tupel},
-    utils::{create_cache_page_path, get_page_content, page_cache_exists, read_pages_file_as_str},
+    utils::{
+        create_cache_page_path, page_cache_exists, read_pages_file_as_category_tree,
+        UNCATEGORIZED_KEY,
+    },
     wiki_api::{fetch_open_search, fetch_page, fetch_text_search},
+    wiki_download::{download_wiki, sync_wiki_info},
 };
 
 mod categories;
@@ -27,6 +29,7 @@ mod languages;
 mod search;
 mod utils;
 mod wiki_api;
+mod wiki_download;
 
 const PAGE_FILE_NAME: &str = "pages.yml";
 
@@ -47,8 +50,10 @@ async fn main() -> Result<(), WikiError> {
 
     let cache_dir = base_dir.cache_dir().join("archwiki-rs");
     let data_dir = base_dir.data_local_dir().join("archwiki-rs");
+    let log_dir = data_dir.join("logs");
     fs::create_dir_all(&cache_dir)?;
     fs::create_dir_all(&data_dir)?;
+    fs::create_dir_all(&log_dir)?;
 
     let default_page_file_path = data_dir.join(PAGE_FILE_NAME);
 
@@ -69,7 +74,7 @@ async fn main() -> Result<(), WikiError> {
             let out = if use_cached_page {
                 fs::read_to_string(&page_cache_path)?
             } else {
-                match fetch_document(&page, lang.as_deref()).await {
+                match fetch_page(&page, lang.as_deref()).await {
                     Ok(document) => match format {
                         PageFormat::PlainText => convert_page_to_plain_text(&document, show_urls),
                         PageFormat::Markdown => convert_page_to_markdown(&document, &page),
@@ -119,34 +124,35 @@ async fn main() -> Result<(), WikiError> {
         }
         Commands::ListPages {
             flatten,
-            category,
+            categories,
             page_file,
         } => {
-            let path = page_file.unwrap_or(default_page_file_path);
-            let file = read_pages_file_as_str(path)?;
-
-            let pages_map: HashMap<String, Vec<String>> = serde_yaml::from_str(&file)?;
-
-            let out = if let Some(category) = category {
-                pages_map
-                    .get(&category)
-                    .ok_or(WikiError::NoCategoryFound(category))?
-                    .iter()
-                    .sorted()
-                    .join("\n")
-            } else {
-                list_pages(&pages_map, flatten)
-            };
+            let (path, is_default) = page_file
+                .map(|path| (path, false))
+                .unwrap_or((default_page_file_path, true));
+
+            let wiki_tree = read_pages_file_as_category_tree(&path, is_default)?;
+            let out = list_pages(
+                &wiki_tree,
+                (!categories.is_empty()).then_some(&categories),
+                flatten,
+            );
 
             println!("{out}");
         }
         Commands::ListCategories { page_file } => {
-            let path = page_file.unwrap_or(default_page_file_path);
-            let file = read_pages_file_as_str(path)?;
-
-            let pages_map: HashMap<String, Vec<String>> = serde_yaml::from_str(&file)?;
+            let (path, is_default) = page_file
+                .map(|path| (path, false))
+                .unwrap_or((default_page_file_path, true));
+
+            let wiki_tree = read_pages_file_as_category_tree(&path, is_default)?;
+            let out = wiki_tree
+                .keys()
+                .unique()
+                .sorted()
+                .filter(|cat| cat.as_str() != UNCATEGORIZED_KEY)
+                .join("\n");
 
-            let out = pages_map.keys().unique().sorted().join("\n");
             println!("{out}");
         }
         Commands::ListLanguages => {
@@ -157,31 +163,40 @@ async fn main() -> Result<(), WikiError> {
         }
         Commands::SyncWiki {
             hide_progress,
-            thread_count,
-            max_categories,
-            start_at,
             print,
+            out_file,
         } => {
-            let thread_count = thread_count.unwrap_or(num_cpus::get_physical());
-            let res = fetch_all_pages(
-                hide_progress,
-                thread_count,
-                max_categories,
-                start_at.as_deref(),
-            )
-            .await?;
+            let path = out_file.unwrap_or(default_page_file_path);
+            sync_wiki_info(&path, print, hide_progress).await?;
+        }
+        Commands::LocalWiki {
+            location,
+            format,
+            page_file,
+            thread_count,
+            show_urls,
+            override_existing_files,
+            hide_progress,
+        } => {
+            let thread_count = thread_count.unwrap_or(num_cpus::get_physical()).max(1);
 
-            let out = serde_yaml::to_string(&res)?;
+            let (path, is_default) = page_file
+                .map(|path| (path, false))
+                .unwrap_or((default_page_file_path, true));
 
-            if !print {
-                fs::write(&default_page_file_path, out)?;
+            let wiki_tree = read_pages_file_as_category_tree(&path, is_default)?;
 
-                if !hide_progress {
-                    println!("data saved to {}", default_page_file_path.to_string_lossy());
-                }
-            } else {
-                println!("{out}");
-            }
+            download_wiki(
+                wiki_tree,
+                format,
+                location,
+                &log_dir,
+                thread_count,
+                override_existing_files,
+                hide_progress,
+                show_urls,
+            )
+            .await?;
         }
         Commands::Info {
             show_cache_dir,
@@ -232,19 +247,3 @@ async fn main() -> Result<(), WikiError> {
 
     Ok(())
 }
-
-async fn fetch_document(page: &str, lang: Option<&str>) -> Result<Html, WikiError> {
-    match Url::parse(page) {
-        Ok(url) => {
-            let document = fetch_page_by_url(url).await?;
-            if get_page_content(&document).is_none() {
-                return Err(WikiError::NoPageFound(
-                    "page is not a valid ArchWiki page".to_owned(),
-                ));
-            }
-
-            Ok(document)
-        }
-        Err(_) => fetch_page(page, lang).await,
-    }
-}
diff --git a/src/search.rs b/src/search.rs
index 15e85e2c6267c6cbd5e8335ffa0b2287a99c3ef6..6af0f923a41becca71c5df21ca80986b3e299263 100644
--- a/src/search.rs
+++ b/src/search.rs
@@ -131,43 +131,26 @@ pub fn open_search_to_page_names(
     }
 }
 
-/// Checks if the open search result contains a name that exactly matches the provided page name.
-/// If there is a match the corresponding page URL is returned.
-pub fn open_search_get_exact_match_url(
-    page: &str,
+/// Return provided page name if the top search result exactly matches it
+pub fn open_search_is_page_exact_match<'a>(
+    page: &'a str,
     search_result: &[OpenSearchItem],
-) -> Result<Option<String>, WikiError> {
+) -> Result<Option<&'a str>, WikiError> {
     use crate::error::InvalidApiResponseError as IAR;
 
     let page_names = search_result.get(1).ok_or(WikiError::InvalidApiResponse(
         IAR::OpenSearchMissingNthElement(1),
     ))?;
 
-    let page_urls = search_result.get(3).ok_or(WikiError::InvalidApiResponse(
-        IAR::OpenSearchMissingNthElement(3),
-    ))?;
-
     let OpenSearchItem::Array(names) = page_names else {
         return Err(WikiError::InvalidApiResponse(
             IAR::OpenSearchNthElementShouldBeArray(1),
-        ))
+        ));
     };
 
-    let OpenSearchItem::Array(urls) = page_urls  else {
-        return Err(WikiError::InvalidApiResponse(
-            IAR::OpenSearchNthElementShouldBeArray(3),
-        ))
-    };
-
-    if let Some(name) = names.first() {
-        if name == page {
-            Ok(urls.first().cloned())
-        } else {
-            Ok(None)
-        }
-    } else {
-        Ok(None)
-    }
+    Ok(names
+        .first()
+        .and_then(|name| (name == page).then_some(page)))
 }
 
 #[cfg(test)]
diff --git a/src/utils.rs b/src/utils.rs
index b670f00d47d928cdc6fba5048031746661baf360..89288bc54ae98dc86da5e4b1be8586724d7a200c 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -1,32 +1,16 @@
 use std::{
+    collections::HashMap,
     fs,
     io::{self, ErrorKind},
     path::{Path, PathBuf},
 };
 
-use ego_tree::NodeRef;
-use regex::Regex;
-use scraper::{node::Element, ElementRef, Html, Node, Selector};
+use itertools::Itertools;
+use scraper::node::Element;
 
 use crate::{error::WikiError, formats::PageFormat};
 
-pub const PAGE_CONTENT_CLASS: &str = "mw-parser-output";
-
-pub enum HtmlTag {
-    A,
-    Ul,
-    Li,
-}
-
-impl HtmlTag {
-    pub fn name(&self) -> String {
-        match *self {
-            HtmlTag::A => "a".to_owned(),
-            HtmlTag::Ul => "ul".to_owned(),
-            HtmlTag::Li => "li".to_owned(),
-        }
-    }
-}
+pub const UNCATEGORIZED_KEY: &str = "Uncategorized";
 
 /// Construct a path to cache a page. Different page formats are cached separately.
 /// All none word characters are escaped with an '_'
@@ -63,32 +47,8 @@ pub fn page_cache_exists(
     Ok(secs_since_modified < fourteen_days)
 }
 
-/// Selects the body of an ArchWiki page
-pub fn get_page_content(document: &Html) -> Option<ElementRef<'_>> {
-    let class = format!(".{PAGE_CONTENT_CLASS}");
-    let selector =
-        Selector::parse(&class).unwrap_or_else(|_| panic!("{class} should be valid selector"));
-    document.select(&selector).next()
-}
-
-pub fn get_elements_by_tag<'a>(root: NodeRef<'a, Node>, tag: &HtmlTag) -> Vec<NodeRef<'a, Node>> {
-    root.children()
-        .flat_map(|n| {
-            if let Node::Element(e) = n.value() {
-                if e.name() == tag.name() {
-                    Some(n)
-                } else {
-                    None
-                }
-            } else {
-                None
-            }
-        })
-        .collect()
-}
-
-pub fn extract_tag_attr(element: &Element, tag: &HtmlTag, attr: &str) -> Option<String> {
-    if element.name() == tag.name() {
+pub fn extract_tag_attr(element: &Element, tag: &str, attr: &str) -> Option<String> {
+    if element.name() == tag {
         element.attr(attr).map(|attr| attr.to_owned())
     } else {
         None
@@ -106,18 +66,103 @@ pub fn update_relative_urls(html: &str, base_url: &str) -> String {
         .replace("poster=\"/", &format!("poster=\"{base_url}/"))
 }
 
-pub fn read_pages_file_as_str(path: PathBuf) -> Result<String, WikiError> {
-    fs::read_to_string(&path).map_err(|err| {
+pub fn read_pages_file_as_category_tree(
+    path: &Path,
+    is_default_path: bool,
+) -> Result<HashMap<String, Vec<String>>, WikiError> {
+    let content = fs::read_to_string(path).map_err(|err| {
         match err.kind() {
-            ErrorKind::NotFound => WikiError::IO(io::Error::new(ErrorKind::NotFound,  format!("Could not find pages file at '{}'. Try running 'archwiki-rs sync-wiki' to create the missing file.", path.to_string_lossy()))),
+            ErrorKind::NotFound =>  {
+                let path_str = path.to_string_lossy();
+                let extra_path_arg = if is_default_path {
+                    String::new()
+                } else {
+                    format!(" --out-file {path_str}")
+                };
+
+                WikiError::IO(io::Error::new(ErrorKind::NotFound,  format!("Could not find pages file at '{path_str}'. Try running 'archwiki-rs sync-wiki{extra_path_arg}' to create the missing file." )))
+            }
             _ => err.into()
         }
-    })
+    })?;
+
+    let page_to_category_map: HashMap<String, Vec<String>> = serde_yaml::from_str(&content)?;
+
+    let mut category_to_page_map = HashMap::new();
+    let mut uncategorized_pages = vec![];
+
+    for (page, cats) in page_to_category_map.into_iter().collect_vec() {
+        if cats.is_empty() {
+            uncategorized_pages.push(page)
+        } else {
+            for cat in cats {
+                let mut pages: Vec<String> =
+                    category_to_page_map.get(&cat).cloned().unwrap_or_default();
+                pages.push(page.clone());
+
+                category_to_page_map.insert(cat, pages);
+            }
+        }
+    }
+
+    if !uncategorized_pages.is_empty() {
+        for (i, uncategoriesed_chunk) in uncategorized_pages
+            .into_iter()
+            .sorted()
+            .chunks(500)
+            .into_iter()
+            .enumerate()
+        {
+            let key = format!("{UNCATEGORIZED_KEY} #{n}", n = i + 1);
+            category_to_page_map.insert(key, uncategoriesed_chunk.collect_vec());
+        }
+    }
+
+    Ok(category_to_page_map)
+}
+
+pub fn to_save_file_name(page: &str) -> String {
+    sanitize_filename::sanitize(page)
+}
+
+pub fn truncate_unicode_str(n: usize, text: &str) -> String {
+    let mut count = 0;
+    let mut res = vec![];
+    let mut chars = text.chars();
+
+    while count < n {
+        if let Some(char) = chars.next() {
+            count += unicode_width::UnicodeWidthChar::width(char).unwrap_or(0);
+            res.push(char);
+        } else {
+            break;
+        }
+    }
+
+    res.into_iter().collect::<String>()
+}
+
+pub fn page_path(page: &str, format: &PageFormat, parent_dir: &Path) -> PathBuf {
+    let ext = match format {
+        PageFormat::PlainText => "",
+        PageFormat::Markdown => "md",
+        PageFormat::Html => "html",
+    };
+
+    parent_dir.join(to_save_file_name(page)).with_extension(ext)
 }
 
-fn to_save_file_name(page: &str) -> String {
-    let regex = Regex::new("[^-0-9A-Za-z_]").expect("'[^0-9A-Za-z_]' should be a valid regex");
-    regex.replace_all(page, "_").to_string()
+pub fn create_dir_if_not_exists(dir: &Path) -> Result<(), WikiError> {
+    match fs::create_dir(dir) {
+        Ok(_) => {}
+        Err(err) => {
+            if err.kind() != io::ErrorKind::AlreadyExists {
+                return Err(err.into());
+            }
+        }
+    }
+
+    Ok(())
 }
 
 #[cfg(test)]
@@ -129,10 +174,10 @@ mod tests {
     fn test_to_save_file_name() {
         let cases = [
             ("Neovim", "Neovim"),
-            ("3D Mouse", "3D_Mouse"),
-            ("/etc/fstab", "_etc_fstab"),
-            (".NET", "_NET"),
-            ("ASUS MeMO Pad 7 (ME176C(X))", "ASUS_MeMO_Pad_7__ME176C_X__"),
+            ("3D Mouse", "3D Mouse"),
+            ("/etc/fstab", "etcfstab"),
+            (".NET", ".NET"),
+            ("ASUS MeMO Pad 7 (ME176C(X))", "ASUS MeMO Pad 7 (ME176C(X))"),
         ];
 
         for (input, output) in cases {
diff --git a/src/wiki_api.rs b/src/wiki_api.rs
index 3c3875878b34b0a62931547a7384cdf362007754..e6811909477f58d0c4979f2a94b4ef22618794bd 100644
--- a/src/wiki_api.rs
+++ b/src/wiki_api.rs
@@ -1,20 +1,41 @@
+use std::collections::HashMap;
+
 use scraper::Html;
+use serde::Deserialize;
 use url::Url;
 
 use crate::{
     error::WikiError,
     search::{
-        open_search_get_exact_match_url, open_search_to_page_names, OpenSearchItem,
+        open_search_is_page_exact_match, open_search_to_page_names, OpenSearchItem,
         TextSearchApiResponse, TextSearchItem,
     },
     utils::update_relative_urls,
 };
 
+const BLOCK_LISTED_CATEGORY_PREFIXES: &[&str] = &[
+    "Pages flagged with",
+    "Sections flagged with",
+    "Pages or sections flagged with",
+    "Pages where template include size is exceeded",
+    "Pages with broken package links",
+    "Pages with broken section links",
+    "Pages with missing package links",
+    "Pages with missing section links",
+    "Pages with dead links",
+];
+
 #[derive(Debug, Clone, serde::Deserialize)]
 pub struct ApiResponse<T> {
     pub query: T,
 }
 
+#[derive(Debug, Clone, serde::Deserialize)]
+pub struct ApiResponseWithContinue<T, V> {
+    pub query: T,
+    pub r#continue: Option<V>,
+}
+
 pub async fn fetch_open_search(
     search: &str,
     lang: &str,
@@ -25,7 +46,10 @@ pub async fn fetch_open_search(
     let res: Vec<OpenSearchItem> = serde_json::from_str(&body)?;
 
     // the first item in the response should be the search term
-    debug_assert_eq!(res.first(), Some(&OpenSearchItem::Single(search.to_owned())));
+    debug_assert_eq!(
+        res.first(),
+        Some(&OpenSearchItem::Single(search.to_owned()))
+    );
 
     Ok(res)
 }
@@ -46,40 +70,40 @@ pub async fn fetch_text_search(
     Ok(res.query.search)
 }
 
-/// Gets an ArchWiki pages entire content. Also updates all relative URLs to absolute URLs.
-/// `/title/Neovim` -> `https://wiki.archlinux.org/title/Neovim`
+/// Gets the HTML content of an ArchWiki page.
 ///
-/// If the ArchWiki page doesn't have exists the top 5 pages that are most
+/// If the ArchWiki page doesn't exists the top 5 pages that are most
 /// like the page that was given as an argument are returned as a `NoPageFound` error.
 pub async fn fetch_page(page: &str, lang: Option<&str>) -> Result<Html, WikiError> {
     let lang = lang.unwrap_or("en");
-
     let search_res = fetch_open_search(page, lang, 5).await?;
 
-    let Some(url) = open_search_get_exact_match_url(page, &search_res)? else {
+    let Some(page_title) = open_search_is_page_exact_match(page, &search_res)? else {
         let similar_pages = open_search_to_page_names(&search_res)?;
         return Err(WikiError::NoPageFound(similar_pages.join("\n")));
     };
 
-    let parsed_url = Url::parse(&url)
-        .unwrap_or(Url::parse("https://wiki.archlinux.org").expect("should be a valid URL"));
-    let base_url = format!(
-        "{schema}://{host}",
-        schema = parsed_url.scheme(),
-        host = parsed_url.host_str().unwrap_or("")
-    );
+    fetch_page_without_recommendations(page_title).await
+}
 
-    let body = reqwest::get(&url).await?.text().await?;
-    let body_with_abs_urls = update_relative_urls(&body, &base_url);
+/// Gets the HTML content of an ArchWiki page.
+pub async fn fetch_page_without_recommendations(page: &str) -> Result<Html, WikiError> {
+    let raw_url = format!(
+        "https://wiki.archlinux.org/rest.php/v1/page/{title}/html",
+        title = urlencoding::encode(page)
+    );
 
-    Ok(Html::parse_document(&body_with_abs_urls))
+    let url = Url::parse(&raw_url)?;
+    let document = fetch_page_by_url(url).await?;
+    Ok(document)
 }
 
 /// Gets an ArchWiki pages entire content. Also updates all relative URLs to absolute URLs.
-/// `/title/Neovim` -> `https://wiki.archlinux.org/title/Neovim`
+/// `/title/Neovim` -> `https://wiki.archlinux.org/title/Neovim`.
+/// A different base URL is used for pages that aren't hosted directly on `wiki.archlinux.org`
 ///
 /// If the page has no content a `NoPageFound` Error is returned.
-pub async fn fetch_page_by_url(url: Url) -> Result<Html, WikiError> {
+async fn fetch_page_by_url(url: Url) -> Result<Html, WikiError> {
     let base_url = format!(
         "{schema}://{host}",
         schema = url.scheme(),
@@ -91,3 +115,94 @@ pub async fn fetch_page_by_url(url: Url) -> Result<Html, WikiError> {
 
     Ok(Html::parse_document(&body_with_abs_urls))
 }
+
+/// Gets the names of all pages on the ArchWiki and the categories that they belong to.
+///
+/// ### Example
+///                                                                                                                                                                                                                                      
+/// ```sh
+/// Wine        # page name
+/// - Emulation # category
+/// - Gaming    # category
+/// ```
+pub async fn fetch_all_pages() -> Result<HashMap<String, Vec<String>>, WikiError> {
+    #[derive(Debug, Deserialize)]
+    struct ApiAllPagesQuery {
+        pages: HashMap<String, Page>,
+    }
+
+    #[derive(Debug, Deserialize)]
+    struct Page {
+        title: String,
+        categories: Option<Vec<Category>>,
+    }
+
+    #[derive(Debug, Deserialize)]
+    struct Category {
+        title: String,
+    }
+
+    impl From<Category> for String {
+        fn from(value: Category) -> Self {
+            value
+                .title
+                .split_once("Category:")
+                .map(|(_, title)| title.to_owned())
+                .unwrap_or(value.title)
+        }
+    }
+
+    #[derive(Debug, Deserialize)]
+    struct ApiAllPageContinueParams {
+        gapcontinue: Option<String>,
+        clcontinue: Option<String>,
+    }
+
+    let api_url =
+        "https://wiki.archlinux.org/api.php?action=query&generator=allpages&prop=categories&format=json&gaplimit=max&cllimit=max";
+
+    let mut pages: Vec<Page> = vec![];
+
+    let body = reqwest::get(api_url).await?.text().await?;
+    let mut api_resp: ApiResponseWithContinue<ApiAllPagesQuery, ApiAllPageContinueParams> =
+        serde_json::from_str(&body)?;
+
+    pages.append(&mut api_resp.query.pages.into_values().collect());
+
+    while let Some(continue_params) = api_resp.r#continue {
+        let next_api_url = if let Some(gapcontinue) = continue_params.gapcontinue {
+            format!("{api_url}&gapcontinue={}", gapcontinue)
+        } else if let Some(clcontinue) = continue_params.clcontinue {
+            format!("{api_url}&clcontinue={}", clcontinue)
+        } else {
+            break;
+        };
+
+        let body = reqwest::get(&next_api_url).await?.text().await?;
+        api_resp = serde_json::from_str(&body)?;
+
+        pages.append(&mut api_resp.query.pages.into_values().collect());
+    }
+
+    let page_category_tree = pages.into_iter().map(|page| {
+        (
+            page.title,
+            page.categories
+                .map(|cats| {
+                    cats.into_iter()
+                        .map::<String, _>(Into::into)
+                        .filter(|cat| !is_blocked_category(cat))
+                        .collect()
+                })
+                .unwrap_or_default(),
+        )
+    });
+
+    Ok(HashMap::from_iter(page_category_tree))
+}
+
+fn is_blocked_category(category: &str) -> bool {
+    BLOCK_LISTED_CATEGORY_PREFIXES
+        .iter()
+        .any(|blocked_prefix| category.starts_with(blocked_prefix))
+}
diff --git a/src/wiki_download.rs b/src/wiki_download.rs
new file mode 100644
index 0000000000000000000000000000000000000000..8137b4cbf59b7d6ff9baaf87e6f47d41f574698b
--- /dev/null
+++ b/src/wiki_download.rs
@@ -0,0 +1,286 @@
+use std::{
+    collections::HashMap,
+    fs,
+    path::{Path, PathBuf},
+    sync::Arc,
+};
+
+use super::formats::plain_text::convert_page_to_plain_text;
+
+use clap::{builder::PossibleValue, ValueEnum};
+use futures::future;
+use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
+use itertools::Itertools;
+
+use crate::{
+    error::WikiError,
+    formats::{html::convert_page_to_html, markdown::convert_page_to_markdown, PageFormat},
+    utils::truncate_unicode_str,
+    utils::{create_dir_if_not_exists, page_path, to_save_file_name},
+    wiki_api::fetch_all_pages,
+    wiki_api::fetch_page_without_recommendations,
+};
+
+pub async fn sync_wiki_info(
+    page_path: &Path,
+    print: bool,
+    hide_progress: bool,
+) -> Result<(), WikiError> {
+    let spinner = ProgressBar::new_spinner();
+    if hide_progress {
+        spinner.finish_and_clear();
+    }
+
+    let _spin_task = std::thread::spawn(move || loop {
+        spinner.tick();
+        std::thread::sleep(std::time::Duration::from_millis(100));
+    });
+
+    let wiki_tree = fetch_all_pages().await?;
+    let out = serde_yaml::to_string(&wiki_tree)?;
+
+    if !print {
+        fs::write(page_path, out)?;
+
+        if !hide_progress {
+            println!("data saved to {}", page_path.to_string_lossy());
+        }
+    } else {
+        println!("{out}");
+    }
+
+    Ok(())
+}
+
+#[allow(clippy::too_many_arguments)]
+pub async fn download_wiki(
+    wiki_tree: HashMap<String, Vec<String>>,
+    format: PageFormat,
+    location: PathBuf,
+    log_dir: &Path,
+    thread_count: usize,
+    override_exisiting_files: bool,
+    hide_progress: bool,
+    show_urls: bool,
+) -> Result<(), WikiError> {
+    create_dir_if_not_exists(&location)?;
+
+    let total_page_count = wiki_tree.values().map(|pages| pages.len()).sum::<usize>();
+
+    if !hide_progress {
+        if let Some(format) = format
+            .to_possible_value()
+            .as_ref()
+            .map(PossibleValue::get_name)
+        {
+            println!("downloading {total_page_count} pages as {format}\n",)
+        }
+    }
+
+    let multibar = MultiProgress::new();
+
+    let category_count = wiki_tree.values().filter(|v| !v.is_empty()).count();
+    let category_bar = multibar.add(
+        ProgressBar::new(category_count.try_into().unwrap_or(0))
+            .with_prefix("---FETCHING CATEGORIES---")
+            .with_style(
+                ProgressStyle::with_template("[{prefix:^40}]\t {pos:>4}/{len:4}")
+                    .unwrap()
+                    .progress_chars("##-"),
+            ),
+    );
+
+    if hide_progress {
+        category_bar.finish_and_clear();
+    }
+
+    let wiki_tree_without_empty_cats = wiki_tree
+        .into_iter()
+        .filter(|(_, p)| !p.is_empty())
+        .collect_vec();
+
+    let format = Arc::new(format);
+    let location = Arc::new(location);
+    let multibar = Arc::new(multibar);
+    let catbar = Arc::new(category_bar);
+
+    let wiki_tree_chunks =
+        chunk_wiki_with_even_page_distribution(wiki_tree_without_empty_cats, thread_count);
+
+    let tasks = wiki_tree_chunks
+        .into_iter()
+        .map(|chunk| {
+            let format_ref = Arc::clone(&format);
+            let location_ref = Arc::clone(&location);
+            let multibar_ref = Arc::clone(&multibar);
+            let catbar_ref = Arc::clone(&catbar);
+
+            tokio::spawn(async move {
+                download_wiki_chunk(
+                    &chunk,
+                    &format_ref,
+                    &location_ref,
+                    hide_progress,
+                    show_urls,
+                    override_exisiting_files,
+                    &multibar_ref,
+                    &catbar_ref,
+                )
+                .await
+            })
+        })
+        .collect_vec();
+
+    let results = future::join_all(tasks).await;
+    let mut all_failed_fetches = vec![];
+
+    for result in results {
+        match result {
+            Ok(Ok(mut failed_fetchs)) => all_failed_fetches.append(&mut failed_fetchs),
+            Ok(Err(thread_err)) => {
+                eprintln!(
+                    "ERROR: a thread paniced, some pages might be missing\nREASON: {thread_err}"
+                );
+            }
+            Err(_) => {
+                eprintln!("ERROR: failed to join threads, some pages might be missing");
+            }
+        }
+    }
+
+    if !hide_progress {
+        let successfuly_fetched_pages = total_page_count - all_failed_fetches.len();
+        println!("downloaded {successfuly_fetched_pages} pages successfully");
+    }
+
+    if !all_failed_fetches.is_empty() {
+        if !hide_progress {
+            println!("failed to download {} pages", all_failed_fetches.len());
+        }
+
+        let failed_fetches_str = all_failed_fetches
+            .into_iter()
+            .map(|(page, err)| format!("failed to page '{page}'\nREASON: {err}"))
+            .collect_vec()
+            .join("\n\n");
+
+        let path = log_dir.join("local-wiki-download-err.log");
+        let write = fs::write(&path, failed_fetches_str);
+
+        if write.is_ok() && !hide_progress {
+            println!("error log written to '{}'", path.to_string_lossy());
+        }
+    }
+
+    if !hide_progress {
+        println!(
+            "saved local copy of the ArchWiki to '{}'",
+            location.to_string_lossy()
+        )
+    }
+
+    Ok(())
+}
+
+type FailedPageFetches = Vec<(String, WikiError)>;
+
+#[allow(clippy::too_many_arguments)]
+async fn download_wiki_chunk(
+    chunk: &[(String, Vec<String>)],
+    format: &PageFormat,
+    location: &Path,
+    hide_progress: bool,
+    show_urls: bool,
+    override_exisiting_files: bool,
+    multibar: &MultiProgress,
+    catbar: &ProgressBar,
+) -> Result<FailedPageFetches, WikiError> {
+    let mut failed_fetches = vec![];
+
+    for (cat, pages) in chunk {
+        let cat_dir = location.join(to_save_file_name(cat));
+        create_dir_if_not_exists(&cat_dir)?;
+
+        let width = unicode_width::UnicodeWidthStr::width(cat.as_str());
+
+        let leak_str: &'static str = Box::leak(
+            format!(
+                " fetching pages in \"{}\"",
+                if width <= 18 {
+                    truncate_unicode_str(18, cat)
+                } else {
+                    truncate_unicode_str(15, cat) + "..."
+                }
+            )
+            .into_boxed_str(),
+        );
+
+        let bar = multibar.add(
+            ProgressBar::new(pages.len().try_into().unwrap_or(0))
+                .with_prefix(leak_str)
+                .with_style(
+                    ProgressStyle::with_template(
+                        "[{prefix:<40}]\t {bar:40.cyan/blue} {pos:>4}/{len:4}",
+                    )
+                    .unwrap()
+                    .progress_chars("##-"),
+                ),
+        );
+
+        if hide_progress {
+            bar.finish_and_clear();
+        }
+
+        catbar.inc(1);
+        for page in pages {
+            bar.inc(1);
+
+            let path = page_path(page, format, &cat_dir);
+            if override_exisiting_files || !path.exists() {
+                match write_page_to_local_wiki(page, &path, format, show_urls).await {
+                    Ok(()) => {}
+                    Err(err) => failed_fetches.push((page.to_owned(), err)),
+                }
+            }
+        }
+    }
+
+    Ok(failed_fetches)
+}
+
+async fn write_page_to_local_wiki(
+    page: &str,
+    page_path: &Path,
+    format: &PageFormat,
+    show_urls: bool,
+) -> Result<(), WikiError> {
+    let document = fetch_page_without_recommendations(page).await?;
+    let content = match format {
+        PageFormat::PlainText => convert_page_to_plain_text(&document, show_urls),
+        PageFormat::Markdown => convert_page_to_markdown(&document, page),
+        PageFormat::Html => convert_page_to_html(&document, page),
+    };
+
+    fs::write(page_path, content)?;
+    Ok(())
+}
+
+fn chunk_wiki_with_even_page_distribution(
+    wiki_tree: Vec<(String, Vec<String>)>,
+    chunk_count: usize,
+) -> Vec<Vec<(String, Vec<String>)>> {
+    let mut chunks: Vec<Vec<(String, Vec<String>)>> = (0..chunk_count).map(|_| vec![]).collect();
+
+    for entry in wiki_tree {
+        if let Some(chunk) = chunks.iter_mut().min_by(|a, b| {
+            let count_a = a.iter().map(|(_, pages)| pages.len()).sum::<usize>();
+            let count_b = b.iter().map(|(_, pages)| pages.len()).sum::<usize>();
+
+            count_a.cmp(&count_b)
+        }) {
+            chunk.push(entry);
+        }
+    }
+
+    chunks
+}
diff --git a/tests/cli.rs b/tests/cli.rs
index 5759687dd9ef45459a08a938775e0e2b93d37987..6857dbb3f028ce5d4b021f57e4845b135fca6d76 100644
--- a/tests/cli.rs
+++ b/tests/cli.rs
@@ -1,9 +1,5 @@
 use assert_cmd::Command;
-use assert_fs::prelude::{FileWriteStr, PathChild};
-use predicates::{
-    prelude::{predicate, PredicateBooleanExt},
-    Predicate,
-};
+use predicates::prelude::{predicate, PredicateBooleanExt};
 
 #[test]
 fn test_cli_info_cmd() -> Result<(), Box<dyn std::error::Error>> {
@@ -55,22 +51,6 @@ fn test_cli_read_page_cmd() -> Result<(), Box<dyn std::error::Error>> {
         cmd.assert().failure().stderr(pstr::starts_with("Neovim"));
     }
 
-    {
-        let mut cmd = Command::cargo_bin("archwiki-rs")?;
-        cmd.args(["read-page", "-i", "https://wiki.archlinux.org/title/Emacs"]);
-
-        cmd.assert()
-            .success()
-            .stdout(pstr::contains("Installation"));
-    }
-
-    {
-        let mut cmd = Command::cargo_bin("archwiki-rs")?;
-        cmd.args(["read-page", "-i", "https://google.com"]);
-
-        cmd.assert().failure();
-    }
-
     Ok(())
 }
 
@@ -121,45 +101,3 @@ fn test_cli_list_languages_cmd() -> Result<(), Box<dyn std::error::Error>> {
 
     Ok(())
 }
-
-#[test]
-fn test_cli_local_wiki_info() -> Result<(), Box<dyn std::error::Error>> {
-    use predicate::str as pstr;
-
-    let stdout = {
-        let mut cmd = Command::cargo_bin("archwiki-rs")?;
-        cmd.args(["sync-wiki", "-p", "-m", "10"]);
-
-        let stdout = String::from_utf8(cmd.assert().success().get_output().stdout.clone()).unwrap();
-        pstr::contains("About Arch").eval(&stdout);
-
-        stdout
-    };
-
-    let tmp_dir = assert_fs::TempDir::new().unwrap();
-    tmp_dir.child("pages.yml").write_str(&stdout).unwrap();
-
-    let tmp_file_path = tmp_dir.path().join("pages.yml");
-
-    {
-        let mut cmd = Command::cargo_bin("archwiki-rs")?;
-        cmd.args(["list-pages", "-p", tmp_file_path.to_str().unwrap()]);
-
-        cmd.assert().success().stdout(pstr::contains(
-            "About Arch:
-───┤Arch boot process
-───┤Arch build system",
-        ));
-    }
-
-    {
-        let mut cmd = Command::cargo_bin("archwiki-rs")?;
-        cmd.args(["list-categories", "-p", tmp_file_path.to_str().unwrap()]);
-
-        cmd.assert()
-            .success()
-            .stdout(pstr::contains("\n").count(10));
-    }
-
-    Ok(())
-}