diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 2c2ac3f3ff80370bd6bad9fffc9dfb8c8f6931e8..6312f607932fd2241782827cce2ec7968ea3c8b6 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -445,6 +445,7 @@ config LINUX_LINK_BASE
 	  However some customers have peripherals mapped at this addr, so
 	  Linux needs to be scooted a bit.
 	  If you don't know what the above means, leave this setting alone.
+	  This needs to match memory start address specified in Device Tree
 
 config HIGHMEM
 	bool "High Memory Support"
diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index f3db32154973718268c1998e9f16bb03d1af7e7f..44a578c10732cd1ab79558415e3c66f6825c98bd 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -46,6 +46,7 @@
 			snps,pbl = < 32 >;
 			clocks = <&apbclk>;
 			clock-names = "stmmaceth";
+			max-speed = <100>;
 		};
 
 		ehci@0x40000 {
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index b0eb0e7fe21d8a66b1d0ef5267f24dd6a1f8f254..fc81879bc1f5800e5efe8eb43395eb2c5509f481 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -17,7 +17,8 @@
 
 	memory {
 		device_type = "memory";
-		reg = <0x0 0x80000000 0x0 0x40000000	/* 1 GB low mem */
+		/* CONFIG_LINUX_LINK_BASE needs to match low mem start */
+		reg = <0x0 0x80000000 0x0 0x20000000	/* 512 MB low mem */
 		       0x1 0x00000000 0x0 0x40000000>;	/* 1 GB highmem */
 	};
 
diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h
index 6ff657a904b61e700421b60423991273df93e1e1..c28e6c347b4900217ad48053c69679bb3da8b607 100644
--- a/arch/arc/include/asm/mach_desc.h
+++ b/arch/arc/include/asm/mach_desc.h
@@ -23,7 +23,7 @@
  * @dt_compat:		Array of device tree 'compatible' strings
  * 			(XXX: although only 1st entry is looked at)
  * @init_early:		Very early callback [called from setup_arch()]
- * @init_cpu_smp:	for each CPU as it is coming up (SMP as well as UP)
+ * @init_per_cpu:	for each CPU as it is coming up (SMP as well as UP)
  * 			[(M):init_IRQ(), (o):start_kernel_secondary()]
  * @init_machine:	arch initcall level callback (e.g. populate static
  * 			platform devices or parse Devicetree)
@@ -35,7 +35,7 @@ struct machine_desc {
 	const char		**dt_compat;
 	void			(*init_early)(void);
 #ifdef CONFIG_SMP
-	void			(*init_cpu_smp)(unsigned int);
+	void			(*init_per_cpu)(unsigned int);
 #endif
 	void			(*init_machine)(void);
 	void			(*init_late)(void);
diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h
index 133c867d15af0a627576d9faf89c0569220a6f47..991380438d6bfd6af03e6fff4748b8c5342b6a70 100644
--- a/arch/arc/include/asm/smp.h
+++ b/arch/arc/include/asm/smp.h
@@ -48,7 +48,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq);
  * @init_early_smp:	A SMP specific h/w block can init itself
  * 			Could be common across platforms so not covered by
  * 			mach_desc->init_early()
- * @init_irq_cpu:	Called for each core so SMP h/w block driver can do
+ * @init_per_cpu:	Called for each core so SMP h/w block driver can do
  * 			any needed setup per cpu (e.g. IPI request)
  * @cpu_kick:		For Master to kickstart a cpu (optionally at a PC)
  * @ipi_send:		To send IPI to a @cpu
@@ -57,7 +57,7 @@ extern int smp_ipi_irq_setup(int cpu, int irq);
 struct plat_smp_ops {
 	const char 	*info;
 	void		(*init_early_smp)(void);
-	void		(*init_irq_cpu)(int cpu);
+	void		(*init_per_cpu)(int cpu);
 	void		(*cpu_kick)(int cpu, unsigned long pc);
 	void		(*ipi_send)(int cpu);
 	void		(*ipi_clear)(int irq);
diff --git a/arch/arc/include/asm/unwind.h b/arch/arc/include/asm/unwind.h
index 7ca628b6ee2aa6d1e02f6ad01363990439ec10b7..c11a25bb8158dee7f60a5f00ca8ec0addaf54b56 100644
--- a/arch/arc/include/asm/unwind.h
+++ b/arch/arc/include/asm/unwind.h
@@ -112,7 +112,6 @@ struct unwind_frame_info {
 
 extern int arc_unwind(struct unwind_frame_info *frame);
 extern void arc_unwind_init(void);
-extern void arc_unwind_setup(void);
 extern void *unwind_add_table(struct module *module, const void *table_start,
 			      unsigned long table_size);
 extern void unwind_remove_table(void *handle, int init_only);
@@ -152,9 +151,6 @@ static inline void arc_unwind_init(void)
 {
 }
 
-static inline void arc_unwind_setup(void)
-{
-}
 #define unwind_add_table(a, b, c)
 #define unwind_remove_table(a, b)
 
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 26c15682747960d3b5b1c412f76dff3c2ebc7780..0394f9f61b466dea018af3ec371c65a8dbc17acb 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -106,10 +106,21 @@ static struct irq_chip arcv2_irq_chip = {
 static int arcv2_irq_map(struct irq_domain *d, unsigned int irq,
 			 irq_hw_number_t hw)
 {
-	if (irq == TIMER0_IRQ || irq == IPI_IRQ)
+	/*
+	 * core intc IRQs [16, 23]:
+	 * Statically assigned always private-per-core (Timers, WDT, IPI, PCT)
+	 */
+	if (hw < 24) {
+		/*
+		 * A subsequent request_percpu_irq() fails if percpu_devid is
+		 * not set. That in turns sets NOAUTOEN, meaning each core needs
+		 * to call enable_percpu_irq()
+		 */
+		irq_set_percpu_devid(irq);
 		irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_percpu_irq);
-	else
+	} else {
 		irq_set_chip_and_handler(irq, &arcv2_irq_chip, handle_level_irq);
+	}
 
 	return 0;
 }
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index 2ee226546c6a821f739a079326ed92ad52fe16b8..ba17f85285cfe85c53ba6c93db08d620c7109fb9 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -29,11 +29,11 @@ void __init init_IRQ(void)
 
 #ifdef CONFIG_SMP
 	/* a SMP H/w block could do IPI IRQ request here */
-	if (plat_smp_ops.init_irq_cpu)
-		plat_smp_ops.init_irq_cpu(smp_processor_id());
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(smp_processor_id());
 
-	if (machine_desc->init_cpu_smp)
-		machine_desc->init_cpu_smp(smp_processor_id());
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(smp_processor_id());
 #endif
 }
 
@@ -51,6 +51,18 @@ void arch_do_IRQ(unsigned int irq, struct pt_regs *regs)
 	set_irq_regs(old_regs);
 }
 
+/*
+ * API called for requesting percpu interrupts - called by each CPU
+ *  - For boot CPU, actually request the IRQ with genirq core + enables
+ *  - For subsequent callers only enable called locally
+ *
+ * Relies on being called by boot cpu first (i.e. request called ahead) of
+ * any enable as expected by genirq. Hence Suitable only for TIMER, IPI
+ * which are guaranteed to be setup on boot core first.
+ * Late probed peripherals such as perf can't use this as there no guarantee
+ * of being called on boot CPU first.
+ */
+
 void arc_request_percpu_irq(int irq, int cpu,
                             irqreturn_t (*isr)(int irq, void *dev),
                             const char *irq_nm,
@@ -60,14 +72,17 @@ void arc_request_percpu_irq(int irq, int cpu,
 	if (!cpu) {
 		int rc;
 
+#ifdef CONFIG_ISA_ARCOMPACT
 		/*
-		 * These 2 calls are essential to making percpu IRQ APIs work
-		 * Ideally these details could be hidden in irq chip map function
-		 * but the issue is IPIs IRQs being static (non-DT) and platform
-		 * specific, so we can't identify them there.
+		 * A subsequent request_percpu_irq() fails if percpu_devid is
+		 * not set. That in turns sets NOAUTOEN, meaning each core needs
+		 * to call enable_percpu_irq()
+		 *
+		 * For ARCv2, this is done in irq map function since we know
+		 * which irqs are strictly per cpu
 		 */
 		irq_set_percpu_devid(irq);
-		irq_modify_status(irq, IRQ_NOAUTOEN, 0);  /* @irq, @clr, @set */
+#endif
 
 		rc = request_percpu_irq(irq, isr, irq_nm, percpu_dev);
 		if (rc)
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 74a9b074ac3e4e64d97ef8e069f73a9104531682..bd237acdf4f2f9601efbf4c25c45067ddd2b69d9 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -132,7 +132,7 @@ static void mcip_probe_n_setup(void)
 struct plat_smp_ops plat_smp_ops = {
 	.info		= smp_cpuinfo_buf,
 	.init_early_smp	= mcip_probe_n_setup,
-	.init_irq_cpu	= mcip_setup_per_cpu,
+	.init_per_cpu	= mcip_setup_per_cpu,
 	.ipi_send	= mcip_ipi_send,
 	.ipi_clear	= mcip_ipi_clear,
 };
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 0c08bb1ce15aab114c344609879eaa86bfa59a9d..8b134cfe5e1f11023b559639497f7e1a35d2ee79 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -428,12 +428,11 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
 
 #endif /* CONFIG_ISA_ARCV2 */
 
-void arc_cpu_pmu_irq_init(void)
+static void arc_cpu_pmu_irq_init(void *data)
 {
-	struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
+	int irq = *(int *)data;
 
-	arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr,
-			       "ARC perf counters", pmu_cpu);
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
 
 	/* Clear all pending interrupt flags */
 	write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
@@ -515,7 +514,6 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 
 	if (has_interrupts) {
 		int irq = platform_get_irq(pdev, 0);
-		unsigned long flags;
 
 		if (irq < 0) {
 			pr_err("Cannot get IRQ number for the platform\n");
@@ -524,24 +522,12 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 
 		arc_pmu->irq = irq;
 
-		/*
-		 * arc_cpu_pmu_irq_init() needs to be called on all cores for
-		 * their respective local PMU.
-		 * However we use opencoded on_each_cpu() to ensure it is called
-		 * on core0 first, so that arc_request_percpu_irq() sets up
-		 * AUTOEN etc. Otherwise enable_percpu_irq() fails to enable
-		 * perf IRQ on non master cores.
-		 * see arc_request_percpu_irq()
-		 */
-		preempt_disable();
-		local_irq_save(flags);
-		arc_cpu_pmu_irq_init();
-		local_irq_restore(flags);
-		smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1);
-		preempt_enable();
-
-		/* Clean all pending interrupt flags */
-		write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
+		/* intc map function ensures irq_set_percpu_devid() called */
+		request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters",
+				   this_cpu_ptr(&arc_pmu_cpu));
+
+		on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1);
+
 	} else
 		arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index c33e77c0ad3e9eb60367b6c2510c4e35c60d3b52..e1b87444ea9a0740b9651ad6b68fd39f701121a0 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -429,7 +429,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	arc_unwind_init();
-	arc_unwind_setup();
 }
 
 static int __init customize_machine(void)
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 580587805fa302d0d28f7adc89434b920b3be651..ef6e9e15b82abf72d946ab45c3e20096854605c5 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -132,11 +132,11 @@ void start_kernel_secondary(void)
 	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
 	/* Some SMP H/w setup - for each cpu */
-	if (plat_smp_ops.init_irq_cpu)
-		plat_smp_ops.init_irq_cpu(cpu);
+	if (plat_smp_ops.init_per_cpu)
+		plat_smp_ops.init_per_cpu(cpu);
 
-	if (machine_desc->init_cpu_smp)
-		machine_desc->init_cpu_smp(cpu);
+	if (machine_desc->init_per_cpu)
+		machine_desc->init_per_cpu(cpu);
 
 	arc_local_timer_setup();
 
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 7352475451f6c3798885c97b04c11ddd53146990..cf2828ab0905fced185c2ffa434595939f3ec19b 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -170,6 +170,23 @@ static struct unwind_table *find_table(unsigned long pc)
 
 static unsigned long read_pointer(const u8 **pLoc,
 				  const void *end, signed ptrType);
+static void init_unwind_hdr(struct unwind_table *table,
+			    void *(*alloc) (unsigned long));
+
+/*
+ * wrappers for header alloc (vs. calling one vs. other at call site)
+ * to elide section mismatches warnings
+ */
+static void *__init unw_hdr_alloc_early(unsigned long sz)
+{
+	return __alloc_bootmem_nopanic(sz, sizeof(unsigned int),
+				       MAX_DMA_ADDRESS);
+}
+
+static void *unw_hdr_alloc(unsigned long sz)
+{
+	return kmalloc(sz, GFP_KERNEL);
+}
 
 static void init_unwind_table(struct unwind_table *table, const char *name,
 			      const void *core_start, unsigned long core_size,
@@ -209,6 +226,8 @@ void __init arc_unwind_init(void)
 			  __start_unwind, __end_unwind - __start_unwind,
 			  NULL, 0);
 	  /*__start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);*/
+
+	init_unwind_hdr(&root_table, unw_hdr_alloc_early);
 }
 
 static const u32 bad_cie, not_fde;
@@ -241,8 +260,8 @@ static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
 	e2->fde = v;
 }
 
-static void __init setup_unwind_table(struct unwind_table *table,
-				      void *(*alloc) (unsigned long))
+static void init_unwind_hdr(struct unwind_table *table,
+			    void *(*alloc) (unsigned long))
 {
 	const u8 *ptr;
 	unsigned long tableSize = table->size, hdrSize;
@@ -274,13 +293,13 @@ static void __init setup_unwind_table(struct unwind_table *table,
 		const u32 *cie = cie_for_fde(fde, table);
 		signed ptrType;
 
-		if (cie == &not_fde)
+		if (cie == &not_fde)	/* only process FDE here */
 			continue;
 		if (cie == NULL || cie == &bad_cie)
-			return;
+			continue;	/* say FDE->CIE.version != 1 */
 		ptrType = fde_pointer_type(cie);
 		if (ptrType < 0)
-			return;
+			continue;
 
 		ptr = (const u8 *)(fde + 2);
 		if (!read_pointer(&ptr, (const u8 *)(fde + 1) + *fde,
@@ -300,9 +319,11 @@ static void __init setup_unwind_table(struct unwind_table *table,
 
 	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
 	    + 2 * n * sizeof(unsigned long);
+
 	header = alloc(hdrSize);
 	if (!header)
 		return;
+
 	header->version = 1;
 	header->eh_frame_ptr_enc = DW_EH_PE_abs | DW_EH_PE_native;
 	header->fde_count_enc = DW_EH_PE_abs | DW_EH_PE_data4;
@@ -322,6 +343,10 @@ static void __init setup_unwind_table(struct unwind_table *table,
 
 		if (fde[1] == 0xffffffff)
 			continue;	/* this is a CIE */
+
+		if (*(u8 *)(cie + 2) != 1)
+			continue;	/* FDE->CIE.version not supported */
+
 		ptr = (const u8 *)(fde + 2);
 		header->table[n].start = read_pointer(&ptr,
 						      (const u8 *)(fde + 1) +
@@ -342,18 +367,6 @@ static void __init setup_unwind_table(struct unwind_table *table,
 	table->header = (const void *)header;
 }
 
-static void *__init balloc(unsigned long sz)
-{
-	return __alloc_bootmem_nopanic(sz,
-				       sizeof(unsigned int),
-				       __pa(MAX_DMA_ADDRESS));
-}
-
-void __init arc_unwind_setup(void)
-{
-	setup_unwind_table(&root_table, balloc);
-}
-
 #ifdef CONFIG_MODULES
 
 static struct unwind_table *last_table;
@@ -377,6 +390,8 @@ void *unwind_add_table(struct module *module, const void *table_start,
 			  table_start, table_size,
 			  NULL, 0);
 
+	init_unwind_hdr(table, unw_hdr_alloc);
+
 #ifdef UNWIND_DEBUG
 	unw_debug("Table added for [%s] %lx %lx\n",
 		module->name, table->core.pc, table->core.range);
@@ -439,6 +454,7 @@ void unwind_remove_table(void *handle, int init_only)
 	info.init_only = init_only;
 
 	unlink_table(&info); /* XXX: SMP */
+	kfree(table->header);
 	kfree(table);
 }
 
@@ -507,7 +523,8 @@ static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
 
 	if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde)
 	    || (*cie & (sizeof(*cie) - 1))
-	    || (cie[1] != 0xffffffff))
+	    || (cie[1] != 0xffffffff)
+	    || ( *(u8 *)(cie + 2) != 1))   /* version 1 supported */
 		return NULL;	/* this is not a (valid) CIE */
 	return cie;
 }
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index a9305b5a2cd4ba091f7ae18914f6ef5e64d5236c..7d2c4fbf4f22eb1402bc666c077c652c40f38361 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -51,7 +51,9 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	int in_use = 0;
 
 	if (!low_mem_sz) {
-		BUG_ON(base != low_mem_start);
+		if (base != low_mem_start)
+			panic("CONFIG_LINUX_LINK_BASE != DT memory { }");
+
 		low_mem_sz = size;
 		in_use = 1;
 	} else {