[PATCH 00/11] FP/MSA fixes

Discussion:

Paul Burton

2014-09-24 09:45:31 UTC

This series fixes a bunch of bugs, both build & runtime, with FP & MSA
support. Most of them only affect systems with the new FP modes & MSA
support enabled but patch 6 in particular is more general, fixing
problems for mips64 systems.

James Hogan (2):
Revert "MIPS: Don't assume 64-bit FP registers for context switch"
MIPS: MSA: Fix big-endian FPR_IDX implementation

Paul Burton (9):
MIPS: push .set arch=r4000 into the functions needing it
MIPS: assume at as source/dest of MSA copy/insert instructions
MIPS: remove MSA macro recursion
MIPS: wrap cfcmsa & ctcmsa accesses for toolchains with MSA support
MIPS: clear MSACSR cause bits when handling MSA FP exception
MIPS: fix mfc1 & mfhc1 emulation for mips64 systems
MIPS: ensure FCSR cause bits are clear after invoking FPU emulator
MIPS: prevent FP context set via ptrace being discarded
MIPS: disable FPU if the mode is unsupported

arch/mips/include/asm/asmmacro-32.h | 128 ++++++++++-----------
arch/mips/include/asm/asmmacro.h | 218 +++++++++++++++++++++---------------
arch/mips/include/asm/fpu.h | 19 ++--
arch/mips/include/asm/processor.h | 2 +-
arch/mips/kernel/asm-offsets.c | 66 -----------
arch/mips/kernel/genex.S | 11 +-
arch/mips/kernel/ptrace.c | 30 ++++-
arch/mips/kernel/r4k_fpu.S | 13 ++-
arch/mips/kernel/traps.c | 17 +--
arch/mips/math-emu/cp1emu.c | 6 +-
10 files changed, 262 insertions(+), 248 deletions(-)

--
2.0.4

Paul Burton

2014-09-24 09:45:32 UTC

Permalink

The {save,restore}_fp_context{,32} functions require that the assembler
allows the use of sdc instructions on any FP register, and this is
acomplished by setting the arch to r4000. However this has the effect
of enabling the assembler to use mips64 instructions in the expansion
of pseudo-instructions. This was done in the (now-reverted) commit
eec43a224cf1 "MIPS: Save/restore MSA context around signals" which
led to my mistakenly believing that there was an assembler bug, when
in reality the assembler was just emitting mips64 instructions. Avoid
the issue for future commits which will add code to r4k_fpu.S by
pushing the .set arch=r4000 directives into the functions that require
it, and remove the spurious assertion declaring the assembler bug.

Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/include/asm/asmmacro.h | 12 ++++--------
arch/mips/kernel/r4k_fpu.S | 13 ++++++++++++-
2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index cd9a98b..f6293ef 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -312,8 +312,7 @@
.set noat
.insn
.word COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
- /* move triggers an assembler bug... */
- or \rd, $1, zero
+ move \rd, $1
.set pop
.endm

@@ -322,16 +321,14 @@
.set noat
.insn
.word COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
- /* move triggers an assembler bug... */
- or \rd, $1, zero
+ move \rd, $1
.set pop
.endm

.macro insert_w wd, n, rs
.set push
.set noat
- /* move triggers an assembler bug... */
- or $1, \rs, zero
+ move $1, \rs
.word INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
.set pop
.endm
@@ -339,8 +336,7 @@
.macro insert_d wd, n, rs
.set push
.set noat
- /* move triggers an assembler bug... */
- or $1, \rs, zero
+ move $1, \rs
.word INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
.set pop
.endm
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 8352523..787d3db 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -30,9 +30,10 @@
.endm

.set noreorder
- .set arch=r4000

LEAF(_save_fp_context)
+ .set push
+ .set arch=r4000
cfc1 t1, fcr31

#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
@@ -84,11 +85,14 @@ LEAF(_save_fp_context)
EX sw t1, SC_FPC_CSR(a0)
jr ra
li v0, 0 # success
+ .set pop
END(_save_fp_context)

#ifdef CONFIG_MIPS32_COMPAT
/* Save 32-bit process floating point context */
LEAF(_save_fp_context32)
+ .set push
+ .set arch=r4000
cfc1 t1, fcr31

mfc0 t0, CP0_STATUS
@@ -137,6 +141,7 @@ LEAF(_save_fp_context32)

jr ra
li v0, 0 # success
+ .set pop
END(_save_fp_context32)
#endif

@@ -146,6 +151,8 @@ LEAF(_save_fp_context32)
* - cp1 status/control register
*/
LEAF(_restore_fp_context)
+ .set push
+ .set arch=r4000
EX lw t1, SC_FPC_CSR(a0)

#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
@@ -194,10 +201,13 @@ LEAF(_restore_fp_context)
ctc1 t1, fcr31
jr ra
li v0, 0 # success
+ .set pop
END(_restore_fp_context)

#ifdef CONFIG_MIPS32_COMPAT
LEAF(_restore_fp_context32)
+ .set push
+ .set arch=r4000
/* Restore an o32 sigcontext. */
EX lw t1, SC32_FPC_CSR(a0)

@@ -242,6 +252,7 @@ LEAF(_restore_fp_context32)
ctc1 t1, fcr31
jr ra
li v0, 0 # success
+ .set pop
END(_restore_fp_context32)
#endif

--
2.0.4

Paul Burton

2014-09-24 09:45:33 UTC

Permalink

Assuming at ($1) as the source or destination register of copy or
insert instructions:

- Simplifies the macros providing those instructions for toolchains
without MSA support.

- Avoids an unnecessary move instruction when at is used as the source
or destination register anyway.

- Is sufficient for the uses to be introduced in the kernel by a
subsequent patch.

Note that due to a patch ordering snafu on my part this also fixes the
currently broken build with MSA support enabled. The build has been
broken since commit c9017757c532 "MIPS: init upper 64b of vector
registers when MSA is first used", which this patch should have
preceeded.

Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/include/asm/asmmacro.h | 28 ++++++++++++----------------
1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index f6293ef..7e4aaeb 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -218,35 +218,35 @@
.set pop
.endm

- .macro copy_u_w rd, ws, n
+ .macro copy_u_w ws, n
.set push
.set mips32r2
.set msa
- copy_u.w \rd, $w\ws[\n]
+ copy_u.w $1, $w\ws[\n]
.set pop
.endm

- .macro copy_u_d rd, ws, n
+ .macro copy_u_d ws, n
.set push
.set mips64r2
.set msa
- copy_u.d \rd, $w\ws[\n]
+ copy_u.d $1, $w\ws[\n]
.set pop
.endm

- .macro insert_w wd, n, rs
+ .macro insert_w wd, n
.set push
.set mips32r2
.set msa
- insert.w $w\wd[\n], \rs
+ insert.w $w\wd[\n], $1
.set pop
.endm

- .macro insert_d wd, n, rs
+ .macro insert_d wd, n
.set push
.set mips64r2
.set msa
- insert.d $w\wd[\n], \rs
+ insert.d $w\wd[\n], $1
.set pop
.endm
#else
@@ -307,36 +307,32 @@
.set pop
.endm

- .macro copy_u_w rd, ws, n
+ .macro copy_u_w ws, n
.set push
.set noat
.insn
.word COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
- move \rd, $1
.set pop
.endm

- .macro copy_u_d rd, ws, n
+ .macro copy_u_d ws, n
.set push
.set noat
.insn
.word COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
- move \rd, $1
.set pop
.endm

- .macro insert_w wd, n, rs
+ .macro insert_w wd, n
.set push
.set noat
- move $1, \rs
.word INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
.set pop
.endm

- .macro insert_d wd, n, rs
+ .macro insert_d wd, n
.set push
.set noat
- move $1, \rs
.word INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
.set pop
.endm

--
2.0.4

Paul Burton

2014-09-24 09:45:34 UTC

Permalink

Recursive macros made the code more concise & worked great for the
case where the toolchain doesn't support MSA. However, with toolchains
which do support MSA they lead to build failures such as:

arch/mips/kernel/r4k_switch.S: Assembler messages:
arch/mips/kernel/r4k_switch.S:148: Error: invalid operands `insert.w $w(0+1)[2],$1'
arch/mips/kernel/r4k_switch.S:148: Error: invalid operands `insert.w $w(0+1)[3],$1'
arch/mips/kernel/r4k_switch.S:148: Error: invalid operands `insert.w $w((0+1)+1)[2],$1'
arch/mips/kernel/r4k_switch.S:148: Error: invalid operands `insert.w $w((0+1)+1)[3],$1'
...

Drop the recursion from msa_init_all_upper invoking the msa_init_upper
macro explicitly for each vector register.

Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/include/asm/asmmacro.h | 34 +++++++++++++++++++++++++++++++---
1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 7e4aaeb..62c4af9 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -425,9 +425,6 @@
insert_w \wd, 2
insert_w \wd, 3
#endif
- .if 31-\wd
- msa_init_upper (\wd+1)
- .endif
.endm

.macro msa_init_all_upper
@@ -435,6 +432,37 @@
.set noat
not $1, zero
msa_init_upper 0
+ msa_init_upper 1
+ msa_init_upper 2
+ msa_init_upper 3
+ msa_init_upper 4
+ msa_init_upper 5
+ msa_init_upper 6
+ msa_init_upper 7
+ msa_init_upper 8
+ msa_init_upper 9
+ msa_init_upper 10
+ msa_init_upper 11
+ msa_init_upper 12
+ msa_init_upper 13
+ msa_init_upper 14
+ msa_init_upper 15
+ msa_init_upper 16
+ msa_init_upper 17
+ msa_init_upper 18
+ msa_init_upper 19
+ msa_init_upper 20
+ msa_init_upper 21
+ msa_init_upper 22
+ msa_init_upper 23
+ msa_init_upper 24
+ msa_init_upper 25
+ msa_init_upper 26
+ msa_init_upper 27
+ msa_init_upper 28
+ msa_init_upper 29
+ msa_init_upper 30
+ msa_init_upper 31
.set pop
.endm

--
2.0.4

Paul Burton

2014-09-24 09:45:35 UTC

Permalink

Uses of the cfcmsa & ctcmsa instructions were not being wrapped by a
macro in the case where the toolchain supports MSA, since the arguments
exactly match a typical use of the instructions. However using current
toolchains this leads to errors such as:

arch/mips/kernel/genex.S:437: Error: opcode not supported on this processor: mips32r2 (mips32r2) `cfcmsa $5,1'

Thus uses of the instructions must be in the context of a ".set msa"
directive, however doing that from the users of the instructions would
be messy due to the possibility that the toolchain does not support
MSA. Fix this by renaming the macros (prepending an underscore) in order
to avoid recursion when attempting to emit the instructions, and provide
implementations for the TOOLCHAIN_SUPPORTS_MSA case which ".set msa" as
appropriate.

Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/include/asm/asmmacro.h | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 62c4af9..0bbb3aa 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -202,6 +202,22 @@
.endm

#ifdef TOOLCHAIN_SUPPORTS_MSA
+ .macro _cfcmsa rd, cs
+ .set push
+ .set mips32r2
+ .set msa
+ cfcmsa \rd, $\cs
+ .set pop
+ .endm
+
+ .macro _ctcmsa cd, rs
+ .set push
+ .set mips32r2
+ .set msa
+ ctcmsa $\cd, \rs
+ .set pop
+ .endm
+
.macro ld_d wd, off, base
.set push
.set mips32r2
@@ -274,7 +290,7 @@
/*
* Temporary until all toolchains in use include MSA support.
*/
- .macro cfcmsa rd, cs
+ .macro _cfcmsa rd, cs
.set push
.set noat
.insn
@@ -283,7 +299,7 @@
.set pop
.endm

- .macro ctcmsa cd, rs
+ .macro _ctcmsa cd, rs
.set push
.set noat
move $1, \rs
@@ -373,7 +389,7 @@
st_d 31, THREAD_FPR31, \thread
.set push
.set noat
- cfcmsa $1, MSA_CSR
+ _cfcmsa $1, MSA_CSR
sw $1, THREAD_MSA_CSR(\thread)
.set pop
.endm
@@ -382,7 +398,7 @@
.set push
.set noat
lw $1, THREAD_MSA_CSR(\thread)
- ctcmsa MSA_CSR, $1
+ _ctcmsa MSA_CSR, $1
.set pop
ld_d 0, THREAD_FPR0, \thread
ld_d 1, THREAD_FPR1, \thread

--
2.0.4

James Hogan

2014-09-24 12:08:20 UTC

Permalink

Post by Paul Burton
and provide
implementations for the TOOLCHAIN_SUPPORTS_MSA case which ".set msa" as
appropriate.

nit: which *uses* ".set msa"?

Cheers
James

Paul Burton

2014-09-25 09:23:50 UTC

Permalink

Post by James Hogan

Post by Paul Burton
and provide
implementations for the TOOLCHAIN_SUPPORTS_MSA case which ".set msa" as
appropriate.

nit: which *uses* ".set msa"?
Cheers
James

In my head I read that without the ".", so "which set msa" seems quite
natural to me. But if you/others are bothered enough let me know, I
don't mind adding "uses".

Paul

James Hogan

2014-09-25 10:24:45 UTC

Permalink

Post by Paul Burton

Post by James Hogan

Post by Paul Burton
and provide
implementations for the TOOLCHAIN_SUPPORTS_MSA case which ".set msa" as
appropriate.

nit: which *uses* ".set msa"?
Cheers
James

In my head I read that without the ".", so "which set msa" seems quite
natural to me. But if you/others are bothered enough let me know, I
don't mind adding "uses".

Meh, fair enough, I'm reading it like that too now :)

Cheers
James

Paul Burton

2014-09-24 09:45:36 UTC

Permalink

Much like for traditional scalar FP exceptions, the cause bits in the
MSACSR register need to be cleared following an MSA FP exception.
Without doing so the exception will simply be raised again whenever
the kernel restores MSACSR from a tasks saved context, leading to
undesirable spurious exceptions. Clear the cause bits from the
handle_msa_fpe function, mirroring the way handle_fpe clears the
cause bits in FCSR.

Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/kernel/genex.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index ac35e12..ae84496 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -367,6 +367,15 @@ NESTED(nmi_handler, PT_SIZE, sp)
STI
.endm

+ .macro __build_clear_msa_fpe
+ _cfcmsa a1, MSA_CSR
+ li a2, ~(0x3f << 12)
+ and a2, a1
+ _ctcmsa MSA_CSR, a1
+ TRACE_IRQS_ON
+ STI
+ .endm
+
.macro __build_clear_ade
MFC0 t0, CP0_BADVADDR
PTR_S t0, PT_BVADDR(sp)
@@ -425,7 +434,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER cpu cpu sti silent /* #11 */
BUILD_HANDLER ov ov sti silent /* #12 */
BUILD_HANDLER tr tr sti silent /* #13 */
- BUILD_HANDLER msa_fpe msa_fpe sti silent /* #14 */
+ BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent /* #14 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
BUILD_HANDLER msa msa sti silent /* #21 */

--
2.0.4

Sergei Shtylyov

2014-09-24 11:10:14 UTC

Permalink

Hello.

Post by Paul Burton
Much like for traditional scalar FP exceptions, the cause bits in the
MSACSR register need to be cleared following an MSA FP exception.
Without doing so the exception will simply be raised again whenever
the kernel restores MSACSR from a tasks saved context, leading to
undesirable spurious exceptions. Clear the cause bits from the
handle_msa_fpe function, mirroring the way handle_fpe clears the
cause bits in FCSR.
---
arch/mips/kernel/genex.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index ac35e12..ae84496 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -367,6 +367,15 @@ NESTED(nmi_handler, PT_SIZE, sp)
STI
.endm
+ .macro __build_clear_msa_fpe
+ _cfcmsa a1, MSA_CSR
+ li a2, ~(0x3f << 12)
+ and a2, a1
+ _ctcmsa MSA_CSR, a1

Not a2?

Post by Paul Burton
+ TRACE_IRQS_ON
+ STI
+ .endm
+
.macro __build_clear_ade
MFC0 t0, CP0_BADVADDR
PTR_S t0, PT_BVADDR(sp)

WBR, Sergei

Paul Burton

2014-09-25 09:20:59 UTC

Permalink

Much like for traditional scalar FP exceptions, the cause bits in the
MSACSR register need to be cleared following an MSA FP exception.
Without doing so the exception will simply be raised again whenever
the kernel restores MSACSR from a tasks saved context, leading to
undesirable spurious exceptions. Clear the cause bits from the
handle_msa_fpe function, mirroring the way handle_fpe clears the
cause bits in FCSR.

Signed-off-by: Paul Burton <***@imgtec.com>
---
Changes in v2:
- Correct the and instruction (thanks Sergei!).
---
arch/mips/kernel/genex.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index ac35e12..5a6ddc6 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -367,6 +367,15 @@ NESTED(nmi_handler, PT_SIZE, sp)
STI
.endm

+ .macro __build_clear_msa_fpe
+ _cfcmsa a1, MSA_CSR
+ li a2, ~(0x3f << 12)
+ and a1, a1, a2
+ _ctcmsa MSA_CSR, a1
+ TRACE_IRQS_ON
+ STI
+ .endm
+
.macro __build_clear_ade
MFC0 t0, CP0_BADVADDR
PTR_S t0, PT_BVADDR(sp)
@@ -425,7 +434,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER cpu cpu sti silent /* #11 */
BUILD_HANDLER ov ov sti silent /* #12 */
BUILD_HANDLER tr tr sti silent /* #13 */
- BUILD_HANDLER msa_fpe msa_fpe sti silent /* #14 */
+ BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent /* #14 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
BUILD_HANDLER msa msa sti silent /* #21 */

--
2.0.4

Paul Burton

2014-09-24 09:45:37 UTC

Permalink

Commit bbd426f542cb "MIPS: Simplify FP context access" modified the
SIFROMREG & SIFROMHREG macros such that they return unsigned rather
than signed 32b integers. I had believed that to be fine, but
inadvertently missed the mfc1 & mfhc1 cases which write to a struct
pt_regs regs element. On mips32 this is fine, but on mips64 those
saved regs fields are 64b wide. Using unsigned values caused the
32b value from the FP register to be zero rather than sign extended
as the architecture specifies, causing incorrect emulation of the
mfc1 & mfhc1 instructions. Fix by reintroducing the casts to signed
integers, and therefore the sign extension.

Signed-off-by: Paul Burton <***@imgtec.com>
Cc: ***@vger.kernel.org # v3.15+
---
arch/mips/math-emu/cp1emu.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index ef8447f..298c1ae 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -655,9 +655,9 @@ static inline bool hybrid_fprs(void)
#define SIFROMREG(si, x) \
do { \
if (cop1_64bit(xcp) && !hybrid_fprs()) \
- (si) = get_fpr32(&ctx->fpr[x], 0); \
+ (si) = (int)get_fpr32(&ctx->fpr[x], 0); \
else \
- (si) = get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \
+ (si) = (int)get_fpr32(&ctx->fpr[(x) & ~1], (x) & 1); \
} while (0)

#define SITOREG(si, x) \
@@ -672,7 +672,7 @@ do { \
} \
} while (0)

-#define SIFROMHREG(si, x) ((si) = get_fpr32(&ctx->fpr[x], 1))
+#define SIFROMHREG(si, x) ((si) = (int)get_fpr32(&ctx->fpr[x], 1))

#define SITOHREG(si, x) \
do { \

--
2.0.4

Paul Burton

2014-09-24 09:45:38 UTC

Permalink

When running the emulator to handle an instruction that raised an FP
unimplemented operation exception, the FCSR cause bits were being
cleared. This is done to ensure that the kernel does not take an FP
exception when later restoring FP context to registers. However, this
was not being done when the emulator is invoked in response to a
coprocessor unusable exception. This happens in 2 cases:

- There is no FPU present in the system. In this case things were
OK, since the FP context is never restored to hardware registers
and thus no FP exception may be raised when restoring FCSR.

- The FPU could not be configured to the mode required by the task.
In this case it would be possible for the emulator to set cause
bits which are later restored to hardware if the task migrates
to a CPU whose associated FPU does support its mode requirements,
or if the tasks FP mode requirements change.

Consistently clear the cause bits after invoking the emulator, by moving
the clearing to process_fpemu_return and ensuring this is always called
before the tasks FP context is restored. This will make it easier to
catch further paths invoking the emulator in future, as will be
introduced in further patches.

Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/kernel/traps.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 165c275..3ea7f7a 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -700,6 +700,13 @@ asmlinkage void do_ov(struct pt_regs *regs)

int process_fpemu_return(int sig, void __user *fault_addr)
{
+ /*
+ * We can't allow the emulated instruction to leave any of the cause
+ * bits set in FCSR. If they were then the kernel would take an FP
+ * exception when restoring FP context.
+ */
+ current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+
if (sig == SIGSEGV || sig == SIGBUS) {
struct siginfo si = {0};
si.si_addr = fault_addr;
@@ -803,18 +810,12 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
&fault_addr);

- /*
- * We can't allow the emulated instruction to leave any of
- * the cause bit set in $fcr31.
- */
- current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+ /* If something went wrong, signal */
+ process_fpemu_return(sig, fault_addr);

/* Restore the hardware register state */
own_fpu(1); /* Using the FPU again. */

- /* If something went wrong, signal */
- process_fpemu_return(sig, fault_addr);
-
goto out;
} else if (fcr31 & FPU_CSR_INV_X)
info.si_code = FPE_FLTINV;

--
2.0.4

Paul Burton

2014-09-24 09:45:39 UTC

Permalink

If a ptracee has not used the FPU and the ptracer sets its FP context
using PTRACE_POKEUSR, PTRACE_SETFPREGS or PTRACE_SETREGSET then that
context will be discarded upon either the ptracee using the FPU or a
further write to the context via ptrace. Prevent this loss by recording
that the task has "used" math once its FP context has been written to.
The context initialisation code that was present for the PTRACE_POKEUSR
case is reused for the other 2 cases to provide consistent behaviour
for the different ptrace requests.

Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/kernel/ptrace.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 645b3c4..4b5543b 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -46,6 +46,26 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

+static void init_fp_ctx(struct task_struct *target)
+{
+ /* If FP has been used then the target already has context */
+ if (tsk_used_math(target))
+ return;
+
+ /* Begin with data registers set to all 1s... */
+ memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr));
+
+ /* ...and FCSR zeroed */
+ target->thread.fpu.fcr31 = 0;
+
+ /*
+ * Record that the target has "used" math, such that the context
+ * just initialised, and any modifications made by the caller,
+ * aren't discarded.
+ */
+ set_stopped_child_used_math(target);
+}
+
/*
* Called by kernel/ptrace.c when detaching..
*
@@ -142,6 +162,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
if (!access_ok(VERIFY_READ, data, 33 * 8))
return -EIO;

+ init_fp_ctx(child);
fregs = get_fpu_regs(child);

for (i = 0; i < 32; i++) {
@@ -439,6 +460,8 @@ static int fpr_set(struct task_struct *target,

/* XXX fcr31 */

+ init_fp_ctx(target);
+
if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu,
@@ -660,12 +683,7 @@ long arch_ptrace(struct task_struct *child, long request,
case FPR_BASE ... FPR_BASE + 31: {
union fpureg *fregs = get_fpu_regs(child);

- if (!tsk_used_math(child)) {
- /* FP not yet used */
- memset(&child->thread.fpu, ~0,
- sizeof(child->thread.fpu));
- child->thread.fpu.fcr31 = 0;
- }
+ init_fp_ctx(child);
#ifdef CONFIG_32BIT
if (test_thread_flag(TIF_32BIT_FPREGS)) {
/*

--
2.0.4

Paul Burton

2014-09-24 09:45:40 UTC

Permalink

The expected semantics of __enable_fpu are for the FPU to be enabled
in the given mode if possible, otherwise for the FPU to be left
disabled and SIGFPE returned. The FPU was incorrectly being left
enabled in cases where the desired value for FR was unavailable.
Without ensuring the FPU is disabled in this case, it would be
possible for userland to go on to execute further FP instructions
natively in the incorrect mode, rather than those instructions being
trapped & emulated as they need to be.

Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/include/asm/fpu.h | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 6e60431..ba62fa5 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -48,6 +48,12 @@ enum fpu_mode {
#define FPU_FR_MASK 0x1
};

+#define __disable_fpu() \
+do { \
+ clear_c0_status(ST0_CU1); \
+ disable_fpu_hazard(); \
+} while (0)
+
static inline int __enable_fpu(enum fpu_mode mode)
{
int fr;
@@ -83,7 +89,12 @@ fr_common:
enable_fpu_hazard();

/* check FR has the desired value */
- return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE;
+ if (!!(read_c0_status() & ST0_FR) == !!fr)
+ return 0;
+
+ /* unsupported FR value */
+ __disable_fpu();
+ return SIGFPE;

default:
BUG();
@@ -92,12 +103,6 @@ fr_common:
return SIGFPE;
}

-#define __disable_fpu() \
-do { \
- clear_c0_status(ST0_CU1); \
- disable_fpu_hazard(); \
-} while (0)
-
#define clear_fpu_owner() clear_thread_flag(TIF_USEDFPU)

static inline int __is_fpu_owner(void)

--
2.0.4

Paul Burton

2014-09-24 09:45:41 UTC

Permalink

From: James Hogan <***@imgtec.com>

This reverts commit 02987633df7ba2f62967791dda816eb191d1add3.

The basic premise of the patch was incorrect since MSA context
(including FP state) is saved using st.d which stores two consecutive
64-bit words in memory rather than a single 128-bit word. This means
that even with big endian MSA, the FP state is still in the first 64-bit
word.

Signed-off-by: James Hogan <***@imgtec.com>
Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/include/asm/asmmacro-32.h | 128 ++++++++++++++++++------------------
arch/mips/include/asm/asmmacro.h | 128 ++++++++++++++++++------------------
arch/mips/kernel/asm-offsets.c | 66 -------------------
3 files changed, 128 insertions(+), 194 deletions(-)

diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h
index e38c281..70e1f17 100644
--- a/arch/mips/include/asm/asmmacro-32.h
+++ b/arch/mips/include/asm/asmmacro-32.h
@@ -14,75 +14,75 @@

.macro fpu_save_single thread tmp=t0
cfc1 \tmp, fcr31
- swc1 $f0, THREAD_FPR0_LS64(\thread)
- swc1 $f1, THREAD_FPR1_LS64(\thread)
- swc1 $f2, THREAD_FPR2_LS64(\thread)
- swc1 $f3, THREAD_FPR3_LS64(\thread)
- swc1 $f4, THREAD_FPR4_LS64(\thread)
- swc1 $f5, THREAD_FPR5_LS64(\thread)
- swc1 $f6, THREAD_FPR6_LS64(\thread)
- swc1 $f7, THREAD_FPR7_LS64(\thread)
- swc1 $f8, THREAD_FPR8_LS64(\thread)
- swc1 $f9, THREAD_FPR9_LS64(\thread)
- swc1 $f10, THREAD_FPR10_LS64(\thread)
- swc1 $f11, THREAD_FPR11_LS64(\thread)
- swc1 $f12, THREAD_FPR12_LS64(\thread)
- swc1 $f13, THREAD_FPR13_LS64(\thread)
- swc1 $f14, THREAD_FPR14_LS64(\thread)
- swc1 $f15, THREAD_FPR15_LS64(\thread)
- swc1 $f16, THREAD_FPR16_LS64(\thread)
- swc1 $f17, THREAD_FPR17_LS64(\thread)
- swc1 $f18, THREAD_FPR18_LS64(\thread)
- swc1 $f19, THREAD_FPR19_LS64(\thread)
- swc1 $f20, THREAD_FPR20_LS64(\thread)
- swc1 $f21, THREAD_FPR21_LS64(\thread)
- swc1 $f22, THREAD_FPR22_LS64(\thread)
- swc1 $f23, THREAD_FPR23_LS64(\thread)
- swc1 $f24, THREAD_FPR24_LS64(\thread)
- swc1 $f25, THREAD_FPR25_LS64(\thread)
- swc1 $f26, THREAD_FPR26_LS64(\thread)
- swc1 $f27, THREAD_FPR27_LS64(\thread)
- swc1 $f28, THREAD_FPR28_LS64(\thread)
- swc1 $f29, THREAD_FPR29_LS64(\thread)
- swc1 $f30, THREAD_FPR30_LS64(\thread)
- swc1 $f31, THREAD_FPR31_LS64(\thread)
+ swc1 $f0, THREAD_FPR0(\thread)
+ swc1 $f1, THREAD_FPR1(\thread)
+ swc1 $f2, THREAD_FPR2(\thread)
+ swc1 $f3, THREAD_FPR3(\thread)
+ swc1 $f4, THREAD_FPR4(\thread)
+ swc1 $f5, THREAD_FPR5(\thread)
+ swc1 $f6, THREAD_FPR6(\thread)
+ swc1 $f7, THREAD_FPR7(\thread)
+ swc1 $f8, THREAD_FPR8(\thread)
+ swc1 $f9, THREAD_FPR9(\thread)
+ swc1 $f10, THREAD_FPR10(\thread)
+ swc1 $f11, THREAD_FPR11(\thread)
+ swc1 $f12, THREAD_FPR12(\thread)
+ swc1 $f13, THREAD_FPR13(\thread)
+ swc1 $f14, THREAD_FPR14(\thread)
+ swc1 $f15, THREAD_FPR15(\thread)
+ swc1 $f16, THREAD_FPR16(\thread)
+ swc1 $f17, THREAD_FPR17(\thread)
+ swc1 $f18, THREAD_FPR18(\thread)
+ swc1 $f19, THREAD_FPR19(\thread)
+ swc1 $f20, THREAD_FPR20(\thread)
+ swc1 $f21, THREAD_FPR21(\thread)
+ swc1 $f22, THREAD_FPR22(\thread)
+ swc1 $f23, THREAD_FPR23(\thread)
+ swc1 $f24, THREAD_FPR24(\thread)
+ swc1 $f25, THREAD_FPR25(\thread)
+ swc1 $f26, THREAD_FPR26(\thread)
+ swc1 $f27, THREAD_FPR27(\thread)
+ swc1 $f28, THREAD_FPR28(\thread)
+ swc1 $f29, THREAD_FPR29(\thread)
+ swc1 $f30, THREAD_FPR30(\thread)
+ swc1 $f31, THREAD_FPR31(\thread)
sw \tmp, THREAD_FCR31(\thread)
.endm

.macro fpu_restore_single thread tmp=t0
lw \tmp, THREAD_FCR31(\thread)
- lwc1 $f0, THREAD_FPR0_LS64(\thread)
- lwc1 $f1, THREAD_FPR1_LS64(\thread)
- lwc1 $f2, THREAD_FPR2_LS64(\thread)
- lwc1 $f3, THREAD_FPR3_LS64(\thread)
- lwc1 $f4, THREAD_FPR4_LS64(\thread)
- lwc1 $f5, THREAD_FPR5_LS64(\thread)
- lwc1 $f6, THREAD_FPR6_LS64(\thread)
- lwc1 $f7, THREAD_FPR7_LS64(\thread)
- lwc1 $f8, THREAD_FPR8_LS64(\thread)
- lwc1 $f9, THREAD_FPR9_LS64(\thread)
- lwc1 $f10, THREAD_FPR10_LS64(\thread)
- lwc1 $f11, THREAD_FPR11_LS64(\thread)
- lwc1 $f12, THREAD_FPR12_LS64(\thread)
- lwc1 $f13, THREAD_FPR13_LS64(\thread)
- lwc1 $f14, THREAD_FPR14_LS64(\thread)
- lwc1 $f15, THREAD_FPR15_LS64(\thread)
- lwc1 $f16, THREAD_FPR16_LS64(\thread)
- lwc1 $f17, THREAD_FPR17_LS64(\thread)
- lwc1 $f18, THREAD_FPR18_LS64(\thread)
- lwc1 $f19, THREAD_FPR19_LS64(\thread)
- lwc1 $f20, THREAD_FPR20_LS64(\thread)
- lwc1 $f21, THREAD_FPR21_LS64(\thread)
- lwc1 $f22, THREAD_FPR22_LS64(\thread)
- lwc1 $f23, THREAD_FPR23_LS64(\thread)
- lwc1 $f24, THREAD_FPR24_LS64(\thread)
- lwc1 $f25, THREAD_FPR25_LS64(\thread)
- lwc1 $f26, THREAD_FPR26_LS64(\thread)
- lwc1 $f27, THREAD_FPR27_LS64(\thread)
- lwc1 $f28, THREAD_FPR28_LS64(\thread)
- lwc1 $f29, THREAD_FPR29_LS64(\thread)
- lwc1 $f30, THREAD_FPR30_LS64(\thread)
- lwc1 $f31, THREAD_FPR31_LS64(\thread)
+ lwc1 $f0, THREAD_FPR0(\thread)
+ lwc1 $f1, THREAD_FPR1(\thread)
+ lwc1 $f2, THREAD_FPR2(\thread)
+ lwc1 $f3, THREAD_FPR3(\thread)
+ lwc1 $f4, THREAD_FPR4(\thread)
+ lwc1 $f5, THREAD_FPR5(\thread)
+ lwc1 $f6, THREAD_FPR6(\thread)
+ lwc1 $f7, THREAD_FPR7(\thread)
+ lwc1 $f8, THREAD_FPR8(\thread)
+ lwc1 $f9, THREAD_FPR9(\thread)
+ lwc1 $f10, THREAD_FPR10(\thread)
+ lwc1 $f11, THREAD_FPR11(\thread)
+ lwc1 $f12, THREAD_FPR12(\thread)
+ lwc1 $f13, THREAD_FPR13(\thread)
+ lwc1 $f14, THREAD_FPR14(\thread)
+ lwc1 $f15, THREAD_FPR15(\thread)
+ lwc1 $f16, THREAD_FPR16(\thread)
+ lwc1 $f17, THREAD_FPR17(\thread)
+ lwc1 $f18, THREAD_FPR18(\thread)
+ lwc1 $f19, THREAD_FPR19(\thread)
+ lwc1 $f20, THREAD_FPR20(\thread)
+ lwc1 $f21, THREAD_FPR21(\thread)
+ lwc1 $f22, THREAD_FPR22(\thread)
+ lwc1 $f23, THREAD_FPR23(\thread)
+ lwc1 $f24, THREAD_FPR24(\thread)
+ lwc1 $f25, THREAD_FPR25(\thread)
+ lwc1 $f26, THREAD_FPR26(\thread)
+ lwc1 $f27, THREAD_FPR27(\thread)
+ lwc1 $f28, THREAD_FPR28(\thread)
+ lwc1 $f29, THREAD_FPR29(\thread)
+ lwc1 $f30, THREAD_FPR30(\thread)
+ lwc1 $f31, THREAD_FPR31(\thread)
ctc1 \tmp, fcr31
.endm

diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 0bbb3aa..438d6cb 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -58,44 +58,44 @@

.macro fpu_save_16even thread tmp=t0
cfc1 \tmp, fcr31
- sdc1 $f0, THREAD_FPR0_LS64(\thread)
- sdc1 $f2, THREAD_FPR2_LS64(\thread)
- sdc1 $f4, THREAD_FPR4_LS64(\thread)
- sdc1 $f6, THREAD_FPR6_LS64(\thread)
- sdc1 $f8, THREAD_FPR8_LS64(\thread)
- sdc1 $f10, THREAD_FPR10_LS64(\thread)
- sdc1 $f12, THREAD_FPR12_LS64(\thread)
- sdc1 $f14, THREAD_FPR14_LS64(\thread)
- sdc1 $f16, THREAD_FPR16_LS64(\thread)
- sdc1 $f18, THREAD_FPR18_LS64(\thread)
- sdc1 $f20, THREAD_FPR20_LS64(\thread)
- sdc1 $f22, THREAD_FPR22_LS64(\thread)
- sdc1 $f24, THREAD_FPR24_LS64(\thread)
- sdc1 $f26, THREAD_FPR26_LS64(\thread)
- sdc1 $f28, THREAD_FPR28_LS64(\thread)
- sdc1 $f30, THREAD_FPR30_LS64(\thread)
+ sdc1 $f0, THREAD_FPR0(\thread)
+ sdc1 $f2, THREAD_FPR2(\thread)
+ sdc1 $f4, THREAD_FPR4(\thread)
+ sdc1 $f6, THREAD_FPR6(\thread)
+ sdc1 $f8, THREAD_FPR8(\thread)
+ sdc1 $f10, THREAD_FPR10(\thread)
+ sdc1 $f12, THREAD_FPR12(\thread)
+ sdc1 $f14, THREAD_FPR14(\thread)
+ sdc1 $f16, THREAD_FPR16(\thread)
+ sdc1 $f18, THREAD_FPR18(\thread)
+ sdc1 $f20, THREAD_FPR20(\thread)
+ sdc1 $f22, THREAD_FPR22(\thread)
+ sdc1 $f24, THREAD_FPR24(\thread)
+ sdc1 $f26, THREAD_FPR26(\thread)
+ sdc1 $f28, THREAD_FPR28(\thread)
+ sdc1 $f30, THREAD_FPR30(\thread)
sw \tmp, THREAD_FCR31(\thread)
.endm

.macro fpu_save_16odd thread
.set push
.set mips64r2
- sdc1 $f1, THREAD_FPR1_LS64(\thread)
- sdc1 $f3, THREAD_FPR3_LS64(\thread)
- sdc1 $f5, THREAD_FPR5_LS64(\thread)
- sdc1 $f7, THREAD_FPR7_LS64(\thread)
- sdc1 $f9, THREAD_FPR9_LS64(\thread)
- sdc1 $f11, THREAD_FPR11_LS64(\thread)
- sdc1 $f13, THREAD_FPR13_LS64(\thread)
- sdc1 $f15, THREAD_FPR15_LS64(\thread)
- sdc1 $f17, THREAD_FPR17_LS64(\thread)
- sdc1 $f19, THREAD_FPR19_LS64(\thread)
- sdc1 $f21, THREAD_FPR21_LS64(\thread)
- sdc1 $f23, THREAD_FPR23_LS64(\thread)
- sdc1 $f25, THREAD_FPR25_LS64(\thread)
- sdc1 $f27, THREAD_FPR27_LS64(\thread)
- sdc1 $f29, THREAD_FPR29_LS64(\thread)
- sdc1 $f31, THREAD_FPR31_LS64(\thread)
+ sdc1 $f1, THREAD_FPR1(\thread)
+ sdc1 $f3, THREAD_FPR3(\thread)
+ sdc1 $f5, THREAD_FPR5(\thread)
+ sdc1 $f7, THREAD_FPR7(\thread)
+ sdc1 $f9, THREAD_FPR9(\thread)
+ sdc1 $f11, THREAD_FPR11(\thread)
+ sdc1 $f13, THREAD_FPR13(\thread)
+ sdc1 $f15, THREAD_FPR15(\thread)
+ sdc1 $f17, THREAD_FPR17(\thread)
+ sdc1 $f19, THREAD_FPR19(\thread)
+ sdc1 $f21, THREAD_FPR21(\thread)
+ sdc1 $f23, THREAD_FPR23(\thread)
+ sdc1 $f25, THREAD_FPR25(\thread)
+ sdc1 $f27, THREAD_FPR27(\thread)
+ sdc1 $f29, THREAD_FPR29(\thread)
+ sdc1 $f31, THREAD_FPR31(\thread)
.set pop
.endm

@@ -111,44 +111,44 @@

.macro fpu_restore_16even thread tmp=t0
lw \tmp, THREAD_FCR31(\thread)
- ldc1 $f0, THREAD_FPR0_LS64(\thread)
- ldc1 $f2, THREAD_FPR2_LS64(\thread)
- ldc1 $f4, THREAD_FPR4_LS64(\thread)
- ldc1 $f6, THREAD_FPR6_LS64(\thread)
- ldc1 $f8, THREAD_FPR8_LS64(\thread)
- ldc1 $f10, THREAD_FPR10_LS64(\thread)
- ldc1 $f12, THREAD_FPR12_LS64(\thread)
- ldc1 $f14, THREAD_FPR14_LS64(\thread)
- ldc1 $f16, THREAD_FPR16_LS64(\thread)
- ldc1 $f18, THREAD_FPR18_LS64(\thread)
- ldc1 $f20, THREAD_FPR20_LS64(\thread)
- ldc1 $f22, THREAD_FPR22_LS64(\thread)
- ldc1 $f24, THREAD_FPR24_LS64(\thread)
- ldc1 $f26, THREAD_FPR26_LS64(\thread)
- ldc1 $f28, THREAD_FPR28_LS64(\thread)
- ldc1 $f30, THREAD_FPR30_LS64(\thread)
+ ldc1 $f0, THREAD_FPR0(\thread)
+ ldc1 $f2, THREAD_FPR2(\thread)
+ ldc1 $f4, THREAD_FPR4(\thread)
+ ldc1 $f6, THREAD_FPR6(\thread)
+ ldc1 $f8, THREAD_FPR8(\thread)
+ ldc1 $f10, THREAD_FPR10(\thread)
+ ldc1 $f12, THREAD_FPR12(\thread)
+ ldc1 $f14, THREAD_FPR14(\thread)
+ ldc1 $f16, THREAD_FPR16(\thread)
+ ldc1 $f18, THREAD_FPR18(\thread)
+ ldc1 $f20, THREAD_FPR20(\thread)
+ ldc1 $f22, THREAD_FPR22(\thread)
+ ldc1 $f24, THREAD_FPR24(\thread)
+ ldc1 $f26, THREAD_FPR26(\thread)
+ ldc1 $f28, THREAD_FPR28(\thread)
+ ldc1 $f30, THREAD_FPR30(\thread)
ctc1 \tmp, fcr31
.endm

.macro fpu_restore_16odd thread
.set push
.set mips64r2
- ldc1 $f1, THREAD_FPR1_LS64(\thread)
- ldc1 $f3, THREAD_FPR3_LS64(\thread)
- ldc1 $f5, THREAD_FPR5_LS64(\thread)
- ldc1 $f7, THREAD_FPR7_LS64(\thread)
- ldc1 $f9, THREAD_FPR9_LS64(\thread)
- ldc1 $f11, THREAD_FPR11_LS64(\thread)
- ldc1 $f13, THREAD_FPR13_LS64(\thread)
- ldc1 $f15, THREAD_FPR15_LS64(\thread)
- ldc1 $f17, THREAD_FPR17_LS64(\thread)
- ldc1 $f19, THREAD_FPR19_LS64(\thread)
- ldc1 $f21, THREAD_FPR21_LS64(\thread)
- ldc1 $f23, THREAD_FPR23_LS64(\thread)
- ldc1 $f25, THREAD_FPR25_LS64(\thread)
- ldc1 $f27, THREAD_FPR27_LS64(\thread)
- ldc1 $f29, THREAD_FPR29_LS64(\thread)
- ldc1 $f31, THREAD_FPR31_LS64(\thread)
+ ldc1 $f1, THREAD_FPR1(\thread)
+ ldc1 $f3, THREAD_FPR3(\thread)
+ ldc1 $f5, THREAD_FPR5(\thread)
+ ldc1 $f7, THREAD_FPR7(\thread)
+ ldc1 $f9, THREAD_FPR9(\thread)
+ ldc1 $f11, THREAD_FPR11(\thread)
+ ldc1 $f13, THREAD_FPR13(\thread)
+ ldc1 $f15, THREAD_FPR15(\thread)
+ ldc1 $f17, THREAD_FPR17(\thread)
+ ldc1 $f19, THREAD_FPR19(\thread)
+ ldc1 $f21, THREAD_FPR21(\thread)
+ ldc1 $f23, THREAD_FPR23(\thread)
+ ldc1 $f25, THREAD_FPR25(\thread)
+ ldc1 $f27, THREAD_FPR27(\thread)
+ ldc1 $f29, THREAD_FPR29(\thread)
+ ldc1 $f31, THREAD_FPR31(\thread)
.set pop
.endm

diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index b1d84bd..2f8e0bb 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -167,72 +167,6 @@ void output_thread_fpu_defines(void)
OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);

- /* the least significant 64 bits of each FP register */
- OFFSET(THREAD_FPR0_LS64, task_struct,
- thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR1_LS64, task_struct,
- thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR2_LS64, task_struct,
- thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR3_LS64, task_struct,
- thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR4_LS64, task_struct,
- thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR5_LS64, task_struct,
- thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR6_LS64, task_struct,
- thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR7_LS64, task_struct,
- thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR8_LS64, task_struct,
- thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR9_LS64, task_struct,
- thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR10_LS64, task_struct,
- thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR11_LS64, task_struct,
- thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR12_LS64, task_struct,
- thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR13_LS64, task_struct,
- thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR14_LS64, task_struct,
- thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR15_LS64, task_struct,
- thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR16_LS64, task_struct,
- thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR17_LS64, task_struct,
- thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR18_LS64, task_struct,
- thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR19_LS64, task_struct,
- thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR20_LS64, task_struct,
- thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR21_LS64, task_struct,
- thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR22_LS64, task_struct,
- thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR23_LS64, task_struct,
- thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR24_LS64, task_struct,
- thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR25_LS64, task_struct,
- thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR26_LS64, task_struct,
- thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR27_LS64, task_struct,
- thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR28_LS64, task_struct,
- thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR29_LS64, task_struct,
- thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR30_LS64, task_struct,
- thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR31_LS64, task_struct,
- thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]);
-
OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr);
BLANK();

--
2.0.4

Paul Burton

2014-09-24 09:45:42 UTC

Permalink

From: James Hogan <***@imgtec.com>

The maximum word size is 64-bits since MSA state is saved using st.d
which stores two 64-bit words, therefore reimplement FPR_IDX using xor,
and only within each 64-bit word.

Signed-off-by: James Hogan <***@imgtec.com>
Signed-off-by: Paul Burton <***@imgtec.com>
---
arch/mips/include/asm/processor.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 05f0843..1c6f086 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -111,7 +111,7 @@ union fpureg {
#ifdef CONFIG_CPU_LITTLE_ENDIAN
# define FPR_IDX(width, idx) (idx)
#else
-# define FPR_IDX(width, idx) ((FPU_REG_WIDTH / (width)) - 1 - (idx))
+# define FPR_IDX(width, idx) ((idx) ^ ((64 / (width)) - 1))
#endif

#define BUILD_FPR_ACCESS(width) \

--
2.0.4

James Hogan

2014-09-24 13:00:46 UTC

Permalink

Post by Paul Burton
This series fixes a bunch of bugs, both build & runtime, with FP & MSA
support. Most of them only affect systems with the new FP modes & MSA
support enabled but patch 6 in particular is more general, fixing
problems for mips64 systems.

I don't claim to be particularly familiar with much of this code, but
FWIW I've read it through and haven't spotted anything wrong aside from
what has already been mentioned.

Thanks
James

Post by Paul Burton
Revert "MIPS: Don't assume 64-bit FP registers for context switch"
MIPS: MSA: Fix big-endian FPR_IDX implementation
MIPS: push .set arch=r4000 into the functions needing it
MIPS: assume at as source/dest of MSA copy/insert instructions
MIPS: remove MSA macro recursion
MIPS: wrap cfcmsa & ctcmsa accesses for toolchains with MSA support
MIPS: clear MSACSR cause bits when handling MSA FP exception
MIPS: fix mfc1 & mfhc1 emulation for mips64 systems
MIPS: ensure FCSR cause bits are clear after invoking FPU emulator
MIPS: prevent FP context set via ptrace being discarded
MIPS: disable FPU if the mode is unsupported
arch/mips/include/asm/asmmacro-32.h | 128 ++++++++++-----------
arch/mips/include/asm/asmmacro.h | 218 +++++++++++++++++++++---------------
arch/mips/include/asm/fpu.h | 19 ++--
arch/mips/include/asm/processor.h | 2 +-
arch/mips/kernel/asm-offsets.c | 66 -----------
arch/mips/kernel/genex.S | 11 +-
arch/mips/kernel/ptrace.c | 30 ++++-
arch/mips/kernel/r4k_fpu.S | 13 ++-
arch/mips/kernel/traps.c | 17 +--
arch/mips/math-emu/cp1emu.c | 6 +-
10 files changed, 262 insertions(+), 248 deletions(-)