This patch is an attempt at introducing 4k CAR size granularity for the generic x86 code. For the old supported CAR sizes, the newly generated code is equivalent, so it should be a no-brainer.
Add a copyright header to the code, the header is derived from the one found in the same piece of code in v3.
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
Index: LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc =================================================================== --- LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc (Revision 3026) +++ LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc (Arbeitskopie) @@ -1,3 +1,28 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2000,2007 Ronald G. Minnich rminnich@gmail.com + * Copyright (C) 2005 Eswar Nallusamy, LANL + * Copyright (C) 2005 Tyan + * (Written by Yinghai Lu yhlu@tyan.com for Tyan) + * Copyright (C) 2007 coresystems GmbH + * (Written by Stefan Reinauer stepan@coresystems.de for coresystems GmbH) + * Copyright (C) 2007 Carl-Daniel Hailfinger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + /* We will use 4K bytes only */ /* disable HyperThreading is done by eswar*/ /* other's is the same as AMD except remove amd specific msr */ @@ -106,39 +131,61 @@ jmp clear_fixed_var_mtrr clear_fixed_var_mtrr_out:
-#if CacheSize == 0x10000 - /* enable caching for 64K using fixed mtrr */ +/* 0x06 is the WB IO type for a given 4k segment. + * segs is the number of 4k segments we want to use for CAR. + * subpart is the nth 32-bit window into IO type configuration. + * reg is the register where the IO type should be stored. + */ +.macro extractmask segs, subpart, reg +.if \segs - (\subpart * 4) <= 0 + xorl \reg, \reg +.elseif \segs - (\subpart * 4) == 1 + movl $0x06000000, \reg +.elseif \segs - (\subpart * 4) == 2 + movl $0x06060000, \reg +.elseif \segs - (\subpart * 4) == 3 + movl $0x06060600, \reg +.elseif \segs - (\subpart * 4) >= 4 + movl $0x06060606, \reg +.endif +.endm +.macro simplemask_helper segs, subpart +.if \subpart & 0x1 + extractmask \segs, \subpart, %eax +.else + extractmask \segs, \subpart, %edx +.endif +.endm +/* size is the cache size in bytes we want to use for CAR. + * part is the nth 64-bit window into IO type configuration. + */ +.macro simplemask size, part + simplemask_helper (\size / 0x1000), (\part * 2 + 1) + simplemask_helper (\size / 0x1000), (\part * 2) +.endm + +#if CacheSize > 0x10000 +#error Invalid CAR size, must be at most 64k. +#endif +#if CacheSize < 0x1000 +#error Invalid CAR size, must be at least 4k. This is a processor limitation. +#endif +#if (CacheSize & (0x1000 - 1)) +#error Invalid CAR size, is not a multiple of 4k. This is a processor limitation. +#endif + +#if CacheSize > 0x8000 + /* enable caching for 32K-64K using fixed mtrr */ movl $0x268, %ecx /* fix4k_c0000*/ - movl $0x06060606, %eax /* WB IO type */ - movl %eax, %edx + simplemask CacheSize, 1 wrmsr - movl $0x269, %ecx - wrmsr #endif
-#if CacheSize == 0x8000 - /* enable caching for 32K using fixed mtrr */ + /* enable caching for 0-32K using fixed mtrr */ movl $0x269, %ecx /* fix4k_c8000*/ - movl $0x06060606, %eax /* WB IO type */ - movl %eax, %edx + simplemask CacheSize, 0 wrmsr -#endif
- /* enable caching for 16K/8K/4K using fixed mtrr */ - movl $0x269, %ecx /* fix4k_cc000*/ -#if CacheSize == 0x4000 - movl $0x06060606, %edx /* WB IO type */ -#endif -#if CacheSize == 0x2000 - movl $0x06060000, %edx /* WB IO type */ -#endif -#if CacheSize == 0x1000 - movl $0x06000000, %edx /* WB IO type */ -#endif - xorl %eax, %eax - wrmsr - - #else /* disable cache */ movl %cr0, %eax
Marc?
While the patch is against the generic x86 CAR code, it can also be easily modified to work with AMD K8/K9/K10 CAR code. Especially the recent K10 commit made AMD CAR an #ifdef mess which could be sorted out nicely.
This patch is part of my quest to clean up those v2 code parts which will someday end up in v3.
Regards, Carl-Daniel
On 30.12.2007 00:30, Carl-Daniel Hailfinger wrote:
This patch is an attempt at introducing 4k CAR size granularity for the generic x86 code. For the old supported CAR sizes, the newly generated code is equivalent, so it should be a no-brainer.
Add a copyright header to the code, the header is derived from the one found in the same piece of code in v3.
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
Index: LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc
--- LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc (Revision 3026) +++ LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc (Arbeitskopie) @@ -1,3 +1,28 @@ +/*
- This file is part of the LinuxBIOS project.
- Copyright (C) 2000,2007 Ronald G. Minnich rminnich@gmail.com
- Copyright (C) 2005 Eswar Nallusamy, LANL
- Copyright (C) 2005 Tyan
- (Written by Yinghai Lu yhlu@tyan.com for Tyan)
- Copyright (C) 2007 coresystems GmbH
- (Written by Stefan Reinauer stepan@coresystems.de for coresystems GmbH)
- Copyright (C) 2007 Carl-Daniel Hailfinger
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
/* We will use 4K bytes only */ /* disable HyperThreading is done by eswar*/ /* other's is the same as AMD except remove amd specific msr */ @@ -106,39 +131,61 @@ jmp clear_fixed_var_mtrr clear_fixed_var_mtrr_out:
-#if CacheSize == 0x10000
/* enable caching for 64K using fixed mtrr */
+/* 0x06 is the WB IO type for a given 4k segment.
- segs is the number of 4k segments we want to use for CAR.
- subpart is the nth 32-bit window into IO type configuration.
- reg is the register where the IO type should be stored.
- */
+.macro extractmask segs, subpart, reg +.if \segs - (\subpart * 4) <= 0
- xorl \reg, \reg
+.elseif \segs - (\subpart * 4) == 1
- movl $0x06000000, \reg
+.elseif \segs - (\subpart * 4) == 2
- movl $0x06060000, \reg
+.elseif \segs - (\subpart * 4) == 3
- movl $0x06060600, \reg
+.elseif \segs - (\subpart * 4) >= 4
- movl $0x06060606, \reg
+.endif +.endm +.macro simplemask_helper segs, subpart +.if \subpart & 0x1
- extractmask \segs, \subpart, %eax
+.else
- extractmask \segs, \subpart, %edx
+.endif +.endm +/* size is the cache size in bytes we want to use for CAR.
- part is the nth 64-bit window into IO type configuration.
- */
+.macro simplemask size, part
- simplemask_helper (\size / 0x1000), (\part * 2 + 1)
- simplemask_helper (\size / 0x1000), (\part * 2)
+.endm
+#if CacheSize > 0x10000 +#error Invalid CAR size, must be at most 64k. +#endif +#if CacheSize < 0x1000 +#error Invalid CAR size, must be at least 4k. This is a processor limitation. +#endif +#if (CacheSize & (0x1000 - 1)) +#error Invalid CAR size, is not a multiple of 4k. This is a processor limitation. +#endif
+#if CacheSize > 0x8000
/* enable caching for 32K-64K using fixed mtrr */ movl $0x268, %ecx /* fix4k_c0000*/
movl $0x06060606, %eax /* WB IO type */
- movl %eax, %edx
- simplemask CacheSize, 1 wrmsr
- movl $0x269, %ecx
- wrmsr
#endif
-#if CacheSize == 0x8000
/* enable caching for 32K using fixed mtrr */
/* enable caching for 0-32K using fixed mtrr */ movl $0x269, %ecx /* fix4k_c8000*/
movl $0x06060606, %eax /* WB IO type */
- movl %eax, %edx
- simplemask CacheSize, 0 wrmsr
-#endif
/* enable caching for 16K/8K/4K using fixed mtrr */
movl $0x269, %ecx /* fix4k_cc000*/
-#if CacheSize == 0x4000
movl $0x06060606, %edx /* WB IO type */
-#endif -#if CacheSize == 0x2000
movl $0x06060000, %edx /* WB IO type */
-#endif -#if CacheSize == 0x1000
movl $0x06000000, %edx /* WB IO type */
-#endif
- xorl %eax, %eax
- wrmsr
#else /* disable cache */ movl %cr0, %eax
Carl-Daniel Hailfinger wrote:
Marc?
While the patch is against the generic x86 CAR code, it can also be easily modified to work with AMD K8/K9/K10 CAR code. Especially the recent K10 commit made AMD CAR an #ifdef mess which could be sorted out nicely.
Yes, It would be good to clean them all up.
This patch is part of my quest to clean up those v2 code parts which will someday end up in v3.
A good cause :)
-#if CacheSize == 0x10000
/* enable caching for 64K using fixed mtrr */
+/* 0x06 is the WB IO type for a given 4k segment.
- segs is the number of 4k segments we want to use for CAR.
- subpart is the nth 32-bit window into IO type configuration.
- reg is the register where the IO type should be stored.
- */
+.macro extractmask segs, subpart, reg +.if \segs - (\subpart * 4) <= 0
- xorl \reg, \reg
+.elseif \segs - (\subpart * 4) == 1
- movl $0x06000000, \reg
+.elseif \segs - (\subpart * 4) == 2
- movl $0x06060000, \reg
+.elseif \segs - (\subpart * 4) == 3
- movl $0x06060600, \reg
+.elseif \segs - (\subpart * 4) >= 4
- movl $0x06060606, \reg
+.endif +.endm
Why not just pass in the number of 4k pieces, \segs - (\subpart * 4)?
+.macro simplemask_helper segs, subpart +.if \subpart & 0x1
- extractmask \segs, \subpart, %eax
+.else
- extractmask \segs, \subpart, %edx
+.endif +.endm +/* size is the cache size in bytes we want to use for CAR.
- part is the nth 64-bit window into IO type configuration.
- */
+.macro simplemask size, part
- simplemask_helper (\size / 0x1000), (\part * 2 + 1)
- simplemask_helper (\size / 0x1000), (\part * 2)
+.endm
The \part stuff isn't really intuitive. I think an .if size > 32K would be better and then the caller doesn't have to know \part. Building on that the macro could fill in ecx as well.
What do you think?
Marc
On 08.01.2008 01:40, Marc Jones wrote:
Carl-Daniel Hailfinger wrote:
Marc?
While the patch is against the generic x86 CAR code, it can also be easily modified to work with AMD K8/K9/K10 CAR code. Especially the recent K10 commit made AMD CAR an #ifdef mess which could be sorted out nicely.
Yes, It would be good to clean them all up.
OK, will prepare a patch to do so after we have finalized the generic x86 CAR patch.
This patch is part of my quest to clean up those v2 code parts which will someday end up in v3.
A good cause :)
Thanks.
-#if CacheSize == 0x10000 - /* enable caching for 64K using fixed mtrr */ +/* 0x06 is the WB IO type for a given 4k segment.
- segs is the number of 4k segments we want to use for CAR.
- subpart is the nth 32-bit window into IO type configuration.
- reg is the register where the IO type should be stored.
- */
+.macro extractmask segs, subpart, reg +.if \segs - (\subpart * 4) <= 0
- xorl \reg, \reg
+.elseif \segs - (\subpart * 4) == 1
- movl $0x06000000, \reg
+.elseif \segs - (\subpart * 4) == 2
- movl $0x06060000, \reg
+.elseif \segs - (\subpart * 4) == 3
- movl $0x06060600, \reg
+.elseif \segs - (\subpart * 4) >= 4
- movl $0x06060606, \reg
+.endif +.endm
Why not just pass in the number of 4k pieces, \segs - (\subpart * 4)?
To simplify understanding the formula for readers of the code. But I agree moving calculations makes the code more readable in the function above.
+.macro simplemask_helper segs, subpart +.if \subpart & 0x1
- extractmask \segs, \subpart, %eax
+.else
- extractmask \segs, \subpart, %edx
+.endif +.endm +/* size is the cache size in bytes we want to use for CAR.
- part is the nth 64-bit window into IO type configuration.
- */
+.macro simplemask size, part
- simplemask_helper (\size / 0x1000), (\part * 2 + 1)
- simplemask_helper (\size / 0x1000), (\part * 2)
+.endm
The \part stuff isn't really intuitive. I think an .if size > 32K would be better and then the caller doesn't have to know \part. Building on that the macro could fill in ecx as well.
I'll rewrite the patch a bit to make it more intuitive. I disagree with the ".if size > 32k" part, though. But once you see my new code, it may be elegant enough to not worry about the 32k boundary anymore.
Regards, Carl-Daniel
Next try, with feedback incorporated. The "part" parameter has been changed to an offset into CAR size expressed in bytes.
This patch is an attempt at introducing 4k CAR size granularity for the generic x86 code. For the old supported CAR sizes, the newly generated code is equivalent, so it should be a no-brainer.
Add a copyright header to the code, the header is derived from the one found in the same piece of code in v3.
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
Index: LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc =================================================================== --- LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc (Revision 3036) +++ LinuxBIOSv2-CARx86/src/cpu/x86/car/cache_as_ram.inc (Arbeitskopie) @@ -1,3 +1,28 @@ +/* + * This file is part of the LinuxBIOS project. + * + * Copyright (C) 2000,2007 Ronald G. Minnich rminnich@gmail.com + * Copyright (C) 2005 Eswar Nallusamy, LANL + * Copyright (C) 2005 Tyan + * (Written by Yinghai Lu yhlu@tyan.com for Tyan) + * Copyright (C) 2007 coresystems GmbH + * (Written by Stefan Reinauer stepan@coresystems.de for coresystems GmbH) + * Copyright (C) 2007 Carl-Daniel Hailfinger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + /* We will use 4K bytes only */ /* disable HyperThreading is done by eswar*/ /* other's is the same as AMD except remove amd specific msr */ @@ -106,39 +131,59 @@ jmp clear_fixed_var_mtrr clear_fixed_var_mtrr_out:
-#if CacheSize == 0x10000 - /* enable caching for 64K using fixed mtrr */ +/* 0x06 is the WB IO type for a given 4k segment. + * segs is the number of 4k segments in the area of the particular + * register we want to use for CAR. + * reg is the register where the IO type should be stored. + */ +.macro extractmask segs, reg +.if \segs <= 0 + /* The xorl here is superfluous because at the point of first execution + * of this macro, %eax and %edx are cleared. Later invocations of this + * macro will have a monotonically increasing segs parameter. + */ + xorl \reg, \reg +.elseif \segs == 1 + movl $0x06000000, \reg +.elseif \segs == 2 + movl $0x06060000, \reg +.elseif \segs == 3 + movl $0x06060600, \reg +.elseif \segs >= 4 + movl $0x06060606, \reg +.endif +.endm + +/* size is the cache size in bytes we want to use for CAR. + * windowoffset is the 32k-aligned window into CAR size + */ +.macro simplemask carsize, windowoffset + simplemask_helper (((\carsize - \windowoffset) / 0x1000) - 4), %eax + simplemask_helper (((\carsize - \windowoffset) / 0x1000)), %edx +.endm + +#if CacheSize > 0x10000 +#error Invalid CAR size, must be at most 64k. +#endif +#if CacheSize < 0x1000 +#error Invalid CAR size, must be at least 4k. This is a processor limitation. +#endif +#if (CacheSize & (0x1000 - 1)) +#error Invalid CAR size, is not a multiple of 4k. This is a processor limitation. +#endif + +#if CacheSize > 0x8000 + /* enable caching for 32K-64K using fixed mtrr */ movl $0x268, %ecx /* fix4k_c0000*/ - movl $0x06060606, %eax /* WB IO type */ - movl %eax, %edx + simplemask CacheSize, 0x8000 wrmsr - movl $0x269, %ecx - wrmsr #endif
-#if CacheSize == 0x8000 - /* enable caching for 32K using fixed mtrr */ + /* enable caching for 0-32K using fixed mtrr */ movl $0x269, %ecx /* fix4k_c8000*/ - movl $0x06060606, %eax /* WB IO type */ - movl %eax, %edx + simplemask CacheSize, 0 wrmsr -#endif
- /* enable caching for 16K/8K/4K using fixed mtrr */ - movl $0x269, %ecx /* fix4k_cc000*/ -#if CacheSize == 0x4000 - movl $0x06060606, %edx /* WB IO type */ -#endif -#if CacheSize == 0x2000 - movl $0x06060000, %edx /* WB IO type */ -#endif -#if CacheSize == 0x1000 - movl $0x06000000, %edx /* WB IO type */ -#endif - xorl %eax, %eax - wrmsr - - #else /* disable cache */ movl %cr0, %eax
Carl-Daniel Hailfinger wrote:
Next try, with feedback incorporated. The "part" parameter has been changed to an offset into CAR size expressed in bytes.
This patch is an attempt at introducing 4k CAR size granularity for the generic x86 code. For the old supported CAR sizes, the newly generated code is equivalent, so it should be a no-brainer.
Add a copyright header to the code, the header is derived from the one found in the same piece of code in v3.
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
This looks good. Thanks for the iterations. Acked-by: Marc Jones marc.jones@amd.com
On 08.01.2008 17:56, Marc Jones wrote:
Carl-Daniel Hailfinger wrote:
Next try, with feedback incorporated. The "part" parameter has been changed to an offset into CAR size expressed in bytes.
This patch is an attempt at introducing 4k CAR size granularity for the generic x86 code. For the old supported CAR sizes, the newly generated code is equivalent, so it should be a no-brainer.
Add a copyright header to the code, the header is derived from the one found in the same piece of code in v3.
Signed-off-by: Carl-Daniel Hailfinger c-d.hailfinger.devel.2006@gmx.net
This looks good. Thanks for the iterations. Acked-by: Marc Jones marc.jones@amd.com
Thanks, r3038.
Will prepare similar patches for v2 AMD CAR and v3 x86 CAR.
Regards, Carl-Daniel