Index: cgen/dev.scm =================================================================== RCS file: /cvs/src/src/cgen/dev.scm,v retrieving revision 1.8 diff -c -p -r1.8 dev.scm *** cgen/dev.scm 15 Feb 2005 09:01:35 -0000 1.8 --- cgen/dev.scm 15 Jun 2005 20:58:25 -0000 *************** *** 132,138 **** (load "sid-model") (load "sid-decode") (set! verbose-level 3) ! (set! APPLICATION 'SIMULATOR) ) (define (load-sim) --- 132,138 ---- (load "sid-model") (load "sid-decode") (set! verbose-level 3) ! (set! APPLICATION 'SID-SIMULATOR) ) (define (load-sim) Index: cgen/mach.scm =================================================================== RCS file: /cvs/src/src/cgen/mach.scm,v retrieving revision 1.8 diff -c -p -r1.8 mach.scm *** cgen/mach.scm 9 Oct 2003 20:49:24 -0000 1.8 --- cgen/mach.scm 15 Jun 2005 20:58:25 -0000 *************** *** 1155,1167 **** ; Allow a cpu family to override the isa parallel-insns spec. ; ??? Concession to the m32r port which can go away, in time. parallel-insns ) nil) ) ; Accessors. ! (define-getters cpu (word-bitsize insn-chunk-bitsize file-transform parallel-insns)) ; Return endianness of instructions. --- 1155,1173 ---- ; Allow a cpu family to override the isa parallel-insns spec. ; ??? Concession to the m32r port which can go away, in time. parallel-insns + + ; Computed: maximum number of insns which may pass before there + ; an insn writes back its output operands. + max-delay + ) nil) ) ; Accessors. ! (define-getters cpu (word-bitsize insn-chunk-bitsize file-transform parallel-insns max-delay)) ! (define-setters cpu (max-delay)) ; Return endianness of instructions. *************** *** 1211,1217 **** word-bitsize insn-chunk-bitsize file-transform ! parallel-insns) (begin (logit 2 "Ignoring " name ".\n") #f))) ; cpu is not to be kept --- 1217,1225 ---- word-bitsize insn-chunk-bitsize file-transform ! parallel-insns ! 0 ; default max-delay. will compute correct value ! ) (begin (logit 2 "Ignoring " name ".\n") #f))) ; cpu is not to be kept Index: cgen/operand.scm =================================================================== RCS file: /cvs/src/src/cgen/operand.scm,v retrieving revision 1.9 diff -c -p -r1.9 operand.scm *** cgen/operand.scm 16 Feb 2005 19:45:36 -0000 1.9 --- cgen/operand.scm 15 Jun 2005 20:58:25 -0000 *************** *** 90,95 **** --- 90,98 ---- ; referenced. #f means the operand is always referenced by ; the instruction. (cond? . #f) + + ; whether (and by how much) this instance of the operand is delayed. + (delayed . #f) ) nil) ) *************** *** 135,140 **** --- 138,145 ---- (define op:set-num! (elm-make-setter 'num)) (define op:cond? (elm-make-getter 'cond?)) (define op:set-cond?! (elm-make-setter 'cond?)) + (define op:delay (elm-make-getter 'delayed)) + (define op:set-delay! (elm-make-setter 'delayed)) ; Compute the hardware type lazily. ; FIXME: op:type should be named op:hwtype or some such. Index: cgen/rtl-c.scm =================================================================== RCS file: /cvs/src/src/cgen/rtl-c.scm,v retrieving revision 1.5 diff -c -p -r1.5 rtl-c.scm *** cgen/rtl-c.scm 16 Jul 2003 05:35:47 -0000 1.5 --- cgen/rtl-c.scm 15 Jun 2005 20:58:26 -0000 *************** *** 1305,1311 **** "bad arg to `operand'" object-or-name))) ) ! (define-fn xop (estate options mode object) object) (define-fn local (estate options mode object-or-name) (cond ((rtx-temp? object-or-name) --- 1305,1327 ---- "bad arg to `operand'" object-or-name))) ) ! (define-fn xop (estate options mode object) ! (let ((delayed (assoc '#:delay (estate-modifiers estate)))) ! (if (and delayed ! (equal? APPLICATION 'SID-SIMULATOR) ! (operand? object)) ! ;; if we're looking at an operand inside a (delay ...) rtx, then we ! ;; are talking about a _delayed_ operand, which is a different ! ;; beast. rather than try to work out what context we were ! ;; constructed within, we just clone the operand instance and set ! ;; the new one to have a delayed value. the setters and getters ! ;; will work it out. ! (let ((obj (object-copy object)) ! (amount (cadr delayed))) ! (op:set-delay! obj amount) ! obj) ! ;; else return the normal object ! object))) (define-fn local (estate options mode object-or-name) (cond ((rtx-temp? object-or-name) *************** *** 1364,1372 **** (cx:make VOID "; /*clobber*/\n") ) ! (define-fn delay (estate options mode n rtx) ! (s-sequence (estate-with-modifiers estate '((#:delay))) VOID '() rtx) ; wip! ! ) ; Gets expanded as a macro. ;(define-fn annul (estate yes?) --- 1380,1417 ---- (cx:make VOID "; /*clobber*/\n") ) ! ! (define-fn delay (estate options mode num-node rtx) ! (case APPLICATION ! ((SID-SIMULATOR) ! (let* ((n (cadddr num-node)) ! (old-delay (let ((old (assoc '#:delay (estate-modifiers estate)))) ! (if old (cadr old) 0))) ! (new-delay (+ n old-delay))) ! (begin ! ;; check for proper usage ! (if (let* ((hw (case (car rtx) ! ((operand) (op:type (rtx-operand-obj rtx))) ! ((xop) (op:type (rtx-xop-obj rtx))) ! (else #f)))) ! (not (and hw (or (pc? hw) (memory? hw) (register? hw))))) ! (context-error ! (estate-context estate) ! (string-append ! "(delay ...) rtx applied to wrong type of operand '" (car rtx) "'. should be pc, register or memory"))) ! ;; signal an error if we're delayed and not in a "parallel-insns" CPU ! (if (not (with-parallel?)) ! (context-error ! (estate-context estate) ! "delayed operand in a non-parallel cpu")) ! ;; update cpu-global pipeline bound ! (cpu-set-max-delay! (current-cpu) (max (cpu-max-delay (current-cpu)) new-delay)) ! ;; pass along new delay to embedded rtx ! (rtx-eval-with-estate rtx mode (estate-with-modifiers estate `((#:delay ,new-delay))))))) ! ! ;; not in sid-land ! (else (s-sequence (estate-with-modifiers estate '((#:delay))) VOID '() rtx)))) ! ; Gets expanded as a macro. ;(define-fn annul (estate yes?) Index: cgen/sid-cpu.scm =================================================================== RCS file: /cvs/src/src/cgen/sid-cpu.scm,v retrieving revision 1.11 diff -c -p -r1.11 sid-cpu.scm *** cgen/sid-cpu.scm 16 Jul 2003 05:35:47 -0000 1.11 --- cgen/sid-cpu.scm 15 Jun 2005 20:58:26 -0000 *************** *** 1,5 **** ; CPU family related simulator generator, excluding decoding and model support. ! ; Copyright (C) 2000, 2002 Red Hat, Inc. ; This file is part of CGEN. ; *********** --- 1,5 ---- ; CPU family related simulator generator, excluding decoding and model support. ! ; Copyright (C) 2000, 2002, 2003, 2005 Red Hat, Inc. ; This file is part of CGEN. ; *********** *************** namespace @arch@ { *** 199,204 **** --- 199,282 ---- (-gen-hardware-struct #f (find hw-need-storage? (current-hw-list)))) ) + (define (-gen-hw-stream-and-destream-fns) + (let* ((sa string-append) + (regs (find hw-need-storage? (current-hw-list))) + (reg-dim (lambda (r) + (let ((dims (-hw-vector-dims r))) + (if (equal? 0 (length dims)) + "0" + (number->string (car dims)))))) + (write-stacks + (map (lambda (n) (sa n "_writes")) + (append (map (lambda (r) (gen-c-symbol (obj:name r))) regs) + (map (lambda (m) (sa m "_memory")) useful-mode-names)))) + (stream-reg (lambda (r) + (let ((rname (sa "hardware." (gen-c-symbol (obj:name r))))) + (if (hw-scalar? r) + (sa " ost << " rname " << ' ';\n") + (sa " for (int i = 0; i < " (reg-dim r) + "; i++)\n ost << " rname "[i] << ' ';\n"))))) + (destream-reg (lambda (r) + (let ((rname (sa "hardware." (gen-c-symbol (obj:name r))))) + (if (hw-scalar? r) + (sa " ist >> " rname ";\n") + (sa " for (int i = 0; i < " (reg-dim r) + "; i++)\n ist >> " rname "[i];\n"))))) + (stream-stacks (lambda (s) (sa " stream_stacks ( stacks." s ", ost);\n"))) + (destream-stacks (lambda (s) (sa " destream_stacks ( stacks." s ", ist);\n"))) + (stack-boilerplate + (sa + " template \n" + " void stream_stacks (const ST &st, std::ostream &ost) const\n" + " {\n" + " for (int i = 0; i < @prefix@::pipe_sz; i++)\n" + " {\n" + " ost << st[i].t << ' ';\n" + " for (int j = 0; j <= st[i].t; j++)\n" + " {\n" + " ost << st[i].buf[j].pc << ' ';\n" + " ost << st[i].buf[j].val << ' ';\n" + " ost << st[i].buf[j].idx0 << ' ';\n" + " }\n" + " }\n" + " }\n" + " \n" + " template \n" + " void destream_stacks (ST &st, std::istream &ist)\n" + " {\n" + " for (int i = 0; i < @prefix@::pipe_sz; i++)\n" + " {\n" + " ist >> st[i].t;\n" + " for (int j = 0; j <= st[i].t; j++)\n" + " {\n" + " ist >> st[i].buf[j].pc;\n" + " ist >> st[i].buf[j].val;\n" + " ist >> st[i].buf[j].idx0;\n" + " }\n" + " }\n" + " }\n" + " \n"))) + (sa + " void stream_cgen_hardware (std::ostream &ost) const \n {\n" + (string-map stream-reg regs) + " }\n" + " void destream_cgen_hardware (std::istream &ist) \n {\n" + (string-map destream-reg regs) + " }\n" + (if (with-parallel?) + (sa stack-boilerplate + " void stream_cgen_write_stacks (std::ostream &ost, " + "const @prefix@::write_stacks &stacks) const \n {\n" + (string-map stream-stacks write-stacks) + " }\n" + " void destream_cgen_write_stacks (std::istream &ist, " + "@prefix@::write_stacks &stacks) \n {\n" + (string-map destream-stacks write-stacks) + " }\n") + "")))) + + ; Generate -cpu.h (define (cgen-cpu.h) *************** public: *** 222,227 **** --- 300,307 ---- -gen-hardware-types + -gen-hw-stream-and-destream-fns + " // C++ register access function templates\n" "#define current_cpu this\n\n" (lambda () *************** typedef struct { *** 295,362 **** ) ) ! ; Utility of gen-parallel-exec-type to generate the definition of one ! ; structure in PAREXEC. ! ; SFMT is an object. ! ! (define (gen-parallel-exec-elm sfmt) ! (string-append ! " struct { /* " (obj:comment sfmt) " */\n" ! (let ((sem-ops ! ((if (with-parallel-write?) sfmt-out-ops sfmt-in-ops) sfmt))) ! (if (null? sem-ops) ! " int empty;\n" ! (string-map ! (lambda (op) ! (logit 2 "Processing operand " (obj:name op) " of format " ! (obj:name sfmt) " ...\n") ! (if (with-parallel-write?) ! (let ((index-type (and (op-save-index? op) ! (gen-index-type op sfmt)))) ! (string-append " " (gen-type op) ! " " (gen-sym op) ";\n" ! (if index-type ! (string-append " " index-type ! " " (gen-sym op) "_idx;\n") ! ""))) ! (string-append " " ! (gen-type op) ! " " ! (gen-sym op) ! ";\n"))) ! sem-ops))) ! " } " (gen-sym sfmt) ";\n" ! ) ! ) ; Generate the definition of the structure that holds register values, etc. ! ; for use during parallel execution. When instructions are executed parallelly ! ; either ! ; - their inputs are read before their outputs are written. Thus we have to ! ; fetch the input values of several instructions before executing any of them. ! ; - or their outputs are queued here first and then written out after all insns ! ; have executed. ! ; The fetched/queued values are stored in an array of PAREXEC structs, one ! ; element per instruction. ! (define (gen-parallel-exec-type) ! (logit 2 "Generating PAREXEC type ...\n") ! (string-append ! (if (with-parallel-write?) ! "/* Queued output values of an instruction. */\n" ! "/* Fetched input values of an instruction. */\n") ! "\ ! struct @prefix@_parexec { ! union {\n" ! (string-map gen-parallel-exec-elm (current-sfmt-list)) ! "\ ! } operands; ! /* For conditionally written operands, bitmask of which ones were. */ ! unsigned long long written; ! };\n\n" ! ) ! ) ; Generate the TRACE_RECORD struct definition. --- 375,524 ---- ) ) ! ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ! ;;; begin stack-based write schedule ! ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ! ! (define useful-mode-names '(BI QI HI SI DI UQI UHI USI UDI SF DF)) ! ! (define (-calculated-memory-write-buffer-size) ! (let* ((is-mem? (lambda (op) (eq? (hw-sem-name (op:type op)) 'h-memory))) ! (count-mem-writes ! (lambda (sfmt) (length (find is-mem? (sfmt-out-ops sfmt)))))) ! (apply max (append '(0) (map count-mem-writes (current-sfmt-list)))))) ! ! ! ;; note: this doesn't really correctly approximate the worst case. user-supplied functions ! ;; might rewrite the pipeline extensively while it's running. ! ;(define (-worst-case-number-of-writes-to hw-name) ! ; (let* ((sfmts (current-sfmt-list)) ! ; (out-ops (map sfmt-out-ops sfmts)) ! ; (pred (lambda (op) (equal? hw-name (gen-c-symbol (obj:name (op:type op)))))) ! ; (filtered-ops (map (lambda (ops) (find pred ops)) out-ops))) ! ; (apply max (cons 0 (map (lambda (ops) (length ops)) filtered-ops))))) ! ! (define (-hw-gen-write-stack-decl nm mode) ! (let* ( ! ; for the time being, we're disabling this size-estimation stuff and just ! ; requiring the user to supply a parameter WRITE_BUF_SZ before they include -defs.h ! ; (pipe-sz (+ 1 (max-delay (cpu-max-delay (current-cpu))))) ! ; (sz (* pipe-sz (-worst-case-number-of-writes-to nm)))) ! ! (mode-pad (spaces (- 4 (string-length mode)))) ! (stack-name (string-append nm "_writes"))) ! (string-append ! " write_stack< write<" mode "> >" mode-pad "\t" stack-name "\t[pipe_sz];\n"))) ! ! ! (define (-hw-gen-write-struct-decl) ! (let* ((dims (-worst-case-index-dims)) ! (sa string-append) ! (ns number->string) ! (idxs (iota dims)) ! (ctor (sa "write (PCADDR _pc, MODE _val" ! (string-map (lambda (x) (sa ", USI _idx" (ns x) "=0")) idxs) ! ") : pc(_pc), val(_val)" ! (string-map (lambda (x) (sa ", idx" (ns x) "(_idx" (ns x) ")")) idxs) ! " {} \n")) ! (idx-fields (string-map (lambda (x) (sa " USI idx" (ns x) ";\n")) idxs))) ! (sa ! "\n\n" ! " template \n" ! " struct write\n" ! " {\n" ! " USI pc;\n" ! " MODE val;\n" ! idx-fields ! " " ctor ! " write() {}\n" ! " };\n" ))) ! ! (define (-hw-vector-dims hw) (elm-get (hw-type hw) 'dimensions)) ! (define (-worst-case-index-dims) ! (apply max ! (append '(1) ; for memory accesses ! (map (lambda (hw) (length (-hw-vector-dims hw))) ! (find (lambda (hw) (not (scalar? hw))) (current-hw-list)))))) ! ! ! (define (-gen-writestacks) ! (let* ((hw (find register? (current-hw-list))) ! (modes useful-mode-names) ! (hw-pairs (map (lambda (h) (list (gen-c-symbol (obj:name h)) ! (obj:name (hw-mode h)))) ! hw)) ! (mem-pairs (map (lambda (m) (list (string-append (symbol->string m) ! "_memory") m)) ! modes)) ! (all-pairs (append mem-pairs hw-pairs)) ! ! (h1 "\n\n// write stacks used in parallel execution\n\n struct write_stacks\n {\n // types of stacks\n\n") ! (wb (string-append ! "\n\n // unified writeback function (defined in @prefix@-write.cc)" ! "\n void writeback (int tick, @cpu@::@cpu@_cpu* current_cpu);" ! "\n // unified write-stack clearing function (defined in @prefix@-write.cc)" ! "\n void reset ();")) ! (zz "\n\n }; // end struct @prefix@::write_stacks \n\n")) ! (string-append ! (-hw-gen-write-struct-decl) ! (foldl (lambda (s pair) (string-append s (apply -hw-gen-write-stack-decl pair))) h1 all-pairs) ! wb ! zz))) ! ! ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ! ;;; end stack-based write schedule ! ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ! ; Generate the definition of the structure that holds register values, etc. ! ; for use during parallel execution. ! (define (gen-write-stack-structure) ! (let ((membuf-sz (-calculated-memory-write-buffer-size)) ! (max-delay (cpu-max-delay (current-cpu)))) ! (logit 2 "Generating write stack structure ...\n") ! (string-append ! " static const int max_delay = " ! (number->string max-delay) ";\n" ! " static const int pipe_sz = " ! (number->string (+ 1 max-delay)) "; // max_delay + 1\n" ! " ! template ! struct write_stack ! { ! int t; ! const int sz; ! ELT buf[WRITE_BUF_SZ]; ! ! write_stack () : t(-1), sz(WRITE_BUF_SZ) {} ! inline bool empty () { return (t == -1); } ! inline void clear () { t = -1; } ! inline void pop () { if (t > -1) t--;} ! inline void push (const ELT &e) { if (t+1 < sz) buf [++t] = e;} ! inline ELT &top () { return buf [t>0 ? ( t ! inline VAL lookahead (int dist, int base, STKS &st, VAL def, int idx=0) ! { ! for (; dist > 0; --dist) ! { ! write_stack &v = st [(base + dist) % pipe_sz]; ! for (int i = v.t; i > 0; --i) ! if (v.buf [i].idx0 == idx) return v.buf [i]; ! } ! return def; ! } ! ! " ! ! (-gen-writestacks) ! ))) ; Generate the TRACE_RECORD struct definition. *************** typedef struct @prefix@_trace_record { *** 392,406 **** #ifndef DEFS_@PREFIX@_H #define DEFS_@PREFIX@_H namespace @cpu@ { ! \n" ! (if (with-parallel?) ! gen-parallel-exec-type ! "") ! "\ ! } // end @cpu@ namespace #endif /* DEFS_@PREFIX@_H */\n" ) --- 554,580 ---- #ifndef DEFS_@PREFIX@_H #define DEFS_@PREFIX@_H + ") + (if (with-parallel?) + (string-write "\ + #include + #include \"cgen-types.h\" + + // forward declaration\n\n namespace @cpu@ { ! struct @cpu@_cpu; ! } ! namespace @prefix@ { ! using namespace cgen; ! ! " ! gen-write-stack-structure ! "\ ! } // end @prefix@ namespace ! ")) ! (string-write "\ #endif /* DEFS_@PREFIX@_H */\n" ) *************** namespace @cpu@ { *** 417,463 **** ; Return C code to fetch and save all output operands to instructions with ; SFMT. - (define (-gen-write-args sfmt) - (string-map (lambda (op) (op:write op sfmt)) - (sfmt-out-ops sfmt)) - ) - - ; Utility of gen-write-fns to generate a writer function for SFMT. ! (define (-gen-write-fn sfmt) ! (logit 2 "Processing write function for \"" (obj:name sfmt) "\" ...\n") ! (string-list ! "\nsem_status\n" ! (-gen-write-fn-name sfmt) " (@cpu@_cpu* current_cpu, @prefix@_scache* sem, @prefix@_parexec* par_exec)\n" ! "{\n" ! (if (with-scache?) ! (gen-define-field-macro sfmt) ! "") ! (gen-define-parallel-operand-macro sfmt) ! " @prefix@_scache* abuf = sem;\n" ! " unsigned long long written = abuf->written;\n" ! " PCADDR pc = abuf->addr;\n" ! " PCADDR npc = 0; // dummy value for branches\n" ! " sem_status status = SEM_STATUS_NORMAL; // ditto\n" ! "\n" ! (-gen-write-args sfmt) ! "\n" ! " return status;\n" ! (gen-undef-parallel-operand-macro sfmt) ! (if (with-scache?) ! (gen-undef-field-macro sfmt) ! "") ! "}\n\n") ! ) ! (define (-gen-write-fns) ! (logit 2 "Processing writer functions ...\n") ! (string-write-map (lambda (sfmt) (-gen-write-fn sfmt)) ! (current-sfmt-list)) ! ) ! ; Generate -write.cxx. (define (cgen-write.cxx) (logit 1 "Generating " (gen-cpu-name) " write.cxx ...\n") --- 591,670 ---- ; Return C code to fetch and save all output operands to instructions with ; SFMT. ! ; Generate -write.cxx. ! (define (-gen-register-writer nm mode dims) ! (let* ((pad " ") ! (sa string-append) ! (mode (symbol->string mode)) ! (idx-args (string-map (lambda (x) (sa "w.idx" (number->string x) ", ")) ! (iota dims)))) ! (sa pad "while (! " nm "_writes[tick].empty())\n" ! pad "{\n" ! pad " write<" mode "> &w = " nm "_writes[tick].top();\n" ! pad " current_cpu->" nm "_set(" idx-args "w.val);\n" ! pad " " nm "_writes[tick].pop();\n" ! pad "}\n\n"))) ! ! (define (-gen-memory-writer nm mode dims) ! (let* ((pad " ") ! (sa string-append) ! (mode (symbol->string mode)) ! (idx-args (string-map (lambda (x) (sa ", w.idx" (number->string x) "")) ! (iota dims)))) ! (sa pad "while (! " nm "_writes[tick].empty())\n" ! pad "{\n" ! pad " write<" mode "> &w = " nm "_writes[tick].top();\n" ! pad " current_cpu->SETMEM" mode " (w.pc" idx-args ", w.val);\n" ! pad " " nm "_writes[tick].pop();\n" ! pad "}\n\n"))) ! ! ! (define (-gen-reset-fn) ! (let* ((sa string-append) ! (objs (append (map (lambda (h) (gen-c-symbol (obj:name h))) ! (find register? (current-hw-list))) ! (map (lambda (m) (sa (symbol->string m) "_memory")) ! useful-mode-names))) ! (clr (lambda (elt) (sa " clear_stacks (" elt "_writes);\n")))) ! (sa ! " template \n" ! " static void clear_stacks (ST &st)\n" ! " {\n" ! " for (int i = 0; i < @prefix@::pipe_sz; i++)\n" ! " st[i].clear();\n" ! " }\n\n" ! " void @prefix@::write_stacks::reset ()\n {\n" ! (string-map clr objs) ! " }"))) ! ! (define (-gen-unified-write-fn) ! (let* ((hw (find register? (current-hw-list))) ! (modes useful-mode-names) ! (hw-triples (map (lambda (h) (list (gen-c-symbol (obj:name h)) ! (obj:name (hw-mode h)) ! (length (-hw-vector-dims h)))) ! hw)) ! (mem-triples (map (lambda (m) (list (string-append (symbol->string m) ! "_memory") ! m 1)) ! modes))) + (logit 2 "Generating writer function ...\n") + (string-append + " ! void @prefix@::write_stacks::writeback (int tick, @cpu@::@cpu@_cpu* current_cpu) ! { ! " ! "\n // register writeback loops\n" ! (string-map (lambda (t) (apply -gen-register-writer t)) hw-triples) ! "\n // memory writeback loops\n" ! (string-map (lambda (t) (apply -gen-memory-writer t)) mem-triples) ! " ! } ! "))) (define (cgen-write.cxx) (logit 1 "Generating " (gen-cpu-name) " write.cxx ...\n") *************** namespace @cpu@ { *** 479,488 **** "\ #include \"@cpu@.h\" - using namespace @cpu@; " ! -gen-write-fns ) ) --- 686,695 ---- "\ #include \"@cpu@.h\" " ! -gen-reset-fn ! -gen-unified-write-fn ) ) *************** using namespace @cpu@; *** 522,540 **** "sem_status\n") "@prefix@_sem_" (gen-sym insn) (if (with-parallel?) ! " (@cpu@_cpu* current_cpu, @prefix@_scache* sem, @prefix@_parexec* par_exec)\n" " (@cpu@_cpu* current_cpu, @prefix@_scache* sem)\n") "{\n" (gen-define-field-macro (insn-sfmt insn)) - (if (with-parallel?) - (gen-define-parallel-operand-macro (insn-sfmt insn)) - "") " sem_status status = SEM_STATUS_NORMAL;\n" " @prefix@_scache* abuf = sem;\n" ! ; Unconditionally written operands are not recorded here. ! (if (or (with-profile?) (with-parallel-write?)) ! " unsigned long long written = 0;\n" ! "") ; The address of this insn, needed by extraction and semantic code. ; Note that the address recorded in the cpu state struct is not used. ; For faster engines that copy will be out of date. --- 729,742 ---- "sem_status\n") "@prefix@_sem_" (gen-sym insn) (if (with-parallel?) ! (string-append " (@cpu@_cpu* current_cpu, @prefix@_scache* sem, const int tick, \n\t" ! "@prefix@::write_stacks &buf)\n") " (@cpu@_cpu* current_cpu, @prefix@_scache* sem)\n") "{\n" (gen-define-field-macro (insn-sfmt insn)) " sem_status status = SEM_STATUS_NORMAL;\n" " @prefix@_scache* abuf = sem;\n" ! ; The address of this insn, needed by extraction and semantic code. ; Note that the address recorded in the cpu state struct is not used. ; For faster engines that copy will be out of date. *************** using namespace @cpu@; *** 543,567 **** "\n" (gen-semantic-code insn) "\n" - ; Only update what's been written if some are conditionally written. - ; Otherwise we know they're all written so there's no point in - ; keeping track. - (if (or (with-profile?) (with-parallel-write?)) - (if (-any-cond-written? (insn-sfmt insn)) - " abuf->written = written;\n" - "") - "") (if cti? " current_cpu->done_cti_insn (npc, status);\n" " current_cpu->done_insn (npc, status);\n") (if (with-parallel?) "" " return status;\n") - (if (with-parallel?) - (gen-undef-parallel-operand-macro (insn-sfmt insn)) - "") (gen-undef-field-macro (insn-sfmt insn)) ! "}\n\n" )) ) --- 745,758 ---- "\n" (gen-semantic-code insn) "\n" (if cti? " current_cpu->done_cti_insn (npc, status);\n" " current_cpu->done_insn (npc, status);\n") (if (with-parallel?) "" " return status;\n") (gen-undef-field-macro (insn-sfmt insn)) ! "}\n\n" )) ) *************** using namespace @cpu@; // FIXME: namespa *** 659,667 **** (if (with-scache?) (gen-define-field-macro (insn-sfmt insn)) "") - (if parallel? - (gen-define-parallel-operand-macro (insn-sfmt insn)) - "") ; Unconditionally written operands are not recorded here. (if (or (with-profile?) (with-parallel-write?)) " unsigned long long written = 0;\n" --- 850,855 ---- *************** using namespace @cpu@; // FIXME: namespa *** 698,706 **** (string-append " pbb_br_npc = npc;\n" " pbb_br_status = br_status;\n") "") - (if parallel? - (gen-undef-parallel-operand-macro (insn-sfmt insn)) - "") (if (with-scache?) (gen-undef-field-macro (insn-sfmt insn)) "") --- 886,891 ---- *************** struct @prefix@_pbb_label { *** 954,962 **** " vpc = vpc + 1;\n") "") (gen-define-field-macro (sfrag-sfmt frag)) - (if parallel? - (gen-define-parallel-operand-macro (sfrag-sfmt frag)) - "") ; Unconditionally written operands are not recorded here. (if (or (with-profile?) (with-parallel-write?)) " unsigned long long written = 0;\n" --- 1139,1144 ---- *************** struct @prefix@_pbb_label { *** 997,1005 **** (string-append " pbb_br_npc = npc;\n" " pbb_br_status = br_status;\n") "") - (if parallel? - (gen-undef-parallel-operand-macro (sfrag-sfmt frag)) - "") (gen-undef-field-macro (sfrag-sfmt frag)) " }\n" (if (sfrag-trailer? frag) --- 1179,1184 ---- Index: cgen/sid-decode.scm =================================================================== RCS file: /cvs/src/src/cgen/sid-decode.scm,v retrieving revision 1.14 diff -c -p -r1.14 sid-decode.scm *** cgen/sid-decode.scm 26 Oct 2003 19:40:30 -0000 1.14 --- cgen/sid-decode.scm 15 Jun 2005 20:58:26 -0000 *************** bool @prefix@_idesc::idesc_table_initial *** 47,56 **** (if pbb? "0, " (string-append (-gen-sem-fn-name insn) ", ")) ! "") ! (if (with-parallel?) ! (string-append (-gen-write-fn-name sfmt) ", ") ! "") "\"" (string-upcase name) "\", " (gen-cpu-insn-enum (current-cpu) insn) ", " --- 47,53 ---- (if pbb? "0, " (string-append (-gen-sem-fn-name insn) ", ")) ! "") "\"" (string-upcase name) "\", " (gen-cpu-insn-enum (current-cpu) insn) ", " *************** bool @prefix@_idesc::idesc_table_initial *** 131,155 **** ) - ;; and the same for writeback functions - - (define (-gen-write-fn-name sfmt) - (string-append "@prefix@_write_" (gen-sym sfmt)) - ) - - - (define (-gen-write-fn-decls) - (string-write - "// Decls of each writeback fn.\n\n" - "using @cpu@::@prefix@_write_fn;\n" - (string-list-map (lambda (sfmt) - (string-list "extern @prefix@_write_fn " - (-gen-write-fn-name sfmt) - ";\n")) - (current-sfmt-list)) - "\n" - ) - ) ; idesc, argbuf, and scache types --- 128,133 ---- *************** struct @cpu@_cpu; *** 164,177 **** struct @prefix@_scache; " (if (with-parallel?) ! "struct @prefix@_parexec;\n" "") ! (if (with-parallel?) ! "typedef void (@prefix@_sem_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem, @prefix@_parexec* par_exec);" "typedef sem_status (@prefix@_sem_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem);") "\n" - (if (with-parallel?) - "typedef sem_status (@prefix@_write_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem, @prefix@_parexec* par_exec);" - "") "\n" " // Instruction descriptor. --- 142,150 ---- struct @prefix@_scache; " (if (with-parallel?) ! "typedef void (@prefix@_sem_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem, int tick, @prefix@::write_stacks &buf);" "typedef sem_status (@prefix@_sem_fn) (@cpu@_cpu* cpu, @prefix@_scache* sem);") "\n" "\n" " // Instruction descriptor. *************** struct @prefix@_idesc { *** 192,203 **** @prefix@_sem_fn* execute;\n\n" "") - (if (with-parallel?) - "\ - // scache write executor for this insn - @prefix@_write_fn* writeback;\n\n" - "") - "\ const char* insn_name; enum @prefix@_insn_type sem_index; --- 165,170 ---- *************** struct @prefix@_scache { *** 300,306 **** // argument buffer @prefix@_sem_fields fields; ! " (if (or (with-any-profile?) (with-parallel-write?)) (string-append " // writeback flags // Only used if profiling or parallel execution support enabled during --- 267,273 ---- // argument buffer @prefix@_sem_fields fields; ! " (if (with-any-profile?) (string-append " // writeback flags // Only used if profiling or parallel execution support enabled during *************** struct @prefix@_scache { *** 549,555 **** (let ((in-ops (find op-profilable? (sfmt-in-ops sfmt))) (out-ops (find op-profilable? (sfmt-out-ops sfmt))) ) ! (if (and (null? in-ops) (null? out-ops)) "" (string-list " /* Record the fields for profiling. */\n" --- 516,522 ---- (let ((in-ops (find op-profilable? (sfmt-in-ops sfmt))) (out-ops (find op-profilable? (sfmt-out-ops sfmt))) ) ! (if (or (not (with-any-profile?)) (and (null? in-ops) (null? out-ops))) "" (string-list " /* Record the fields for profiling. */\n" *************** void *** 714,719 **** --- 681,697 ---- #ifndef @PREFIX@_DECODE_H #define @PREFIX@_DECODE_H + " + (if (with-parallel?) + "\ + namespace @prefix@ { + // forward declaration of struct in -defs.h + struct write_stacks; + } + + " + "") + "\ namespace @cpu@ { using namespace cgen; *************** typedef UINT @prefix@_insn_word; *** 737,746 **** -gen-sem-fn-decls "") - (if (with-parallel?) - -gen-write-fn-decls - "") - "\ #endif /* @PREFIX@_DECODE_H */\n" ) --- 715,720 ---- Index: cgen/sid.scm =================================================================== RCS file: /cvs/src/src/cgen/sid.scm,v retrieving revision 1.14 diff -c -p -r1.14 sid.scm *** cgen/sid.scm 17 May 2005 01:47:16 -0000 1.14 --- cgen/sid.scm 15 Jun 2005 20:58:26 -0000 *************** *** 931,973 **** (rtl-c++ INT yes? nil #:rtl-cover-fns? #t))) ) - ; For parallel write post-processing, we don't want to defer setting the pc. - ; ??? Not sure anymore. - ;(method-make! - ; 'gen-set-quiet - ; (lambda (self estate mode index selector newval) - ; (-op-gen-set-quiet self estate mode index selector newval))) - ;(method-make! - ; 'gen-set-trace - ; (lambda (self estate mode index selector newval) - ; (-op-gen-set-trace self estate mode index selector newval))) - - ; Name of C macro to access parallel execution operand support. - - (define -par-operand-macro "OPRND") - - ; Return C code to fetch an operand's value and save it away for the - ; semantic handler. This is used to handle parallel execution of several - ; instructions where all inputs of all insns are read before any outputs are - ; written. - ; For operands, the word `read' is only used in this context. - - (define (op:read op sfmt) - (let ((estate (estate-make-for-normal-rtl-c++ nil nil))) - (send op 'gen-read estate sfmt -par-operand-macro)) - ) - - ; Return C code to write an operand's value. - ; This is used to handle parallel execution of several instructions where all - ; outputs are written to temporary spots first, and then a final - ; post-processing pass is run to update cpu state. - ; For operands, the word `write' is only used in this context. - - (define (op:write op sfmt) - (let ((estate (estate-make-for-normal-rtl-c++ nil nil))) - (send op 'gen-write estate sfmt -par-operand-macro)) - ) - ; Default gen-read method. ; This is used to help support targets with parallel insns. ; Either this or gen-write (but not both) is used. --- 931,936 ---- *************** *** 1017,1052 **** (method-make! 'cxmake-get (lambda (self estate mode index selector) ! (let ((mode (if (mode:eq? 'DFLT mode) ! (send self 'get-mode) ! mode)) ! (index (if index index (op:index self))) ! (selector (if selector selector (op:selector self)))) ! ; If the object is marked with the RAW attribute, access the hardware ! ; object directly. (logit 4 " cxmake-get self=" (obj:name self) " mode=" (obj:name mode) " index=" (obj:name index) " selector=" selector "\n") ! (cond ((obj-has-attr? self 'RAW) ! (send (op:type self) 'cxmake-get-raw estate mode index selector)) ! ; If the instruction could be parallely executed with others and ! ; we're doing read pre-processing, the operand has already been ! ; fetched, we just have to grab the cached value. ! ((with-parallel-read?) ! (cx:make-with-atlist mode ! (string-append -par-operand-macro ! " (" (gen-sym self) ")") ! nil)) ; FIXME: want CACHED attr if present ! ((op:getter self) ! (let ((args (car (op:getter self))) ! (expr (cadr (op:getter self)))) ! (rtl-c-expr mode expr ! (if (= (length args) 0) ! nil ! (list (list (car args) 'UINT index))) ! #:rtl-cover-fns? #t ! #:output-language (estate-output-language estate)))) ! (else ! (send (op:type self) 'cxmake-get estate mode index selector))))) ) --- 980,1024 ---- (method-make! 'cxmake-get (lambda (self estate mode index selector) ! (let* ((mode (if (mode:eq? 'DFLT mode) ! (send self 'get-mode) ! mode)) ! (hw (op:type self)) ! (index (if index index (op:index self))) ! (idx (if index (-gen-hw-index index estate) "")) ! (idx-args (if (equal? idx "") "" (string-append ", " idx))) ! (selector (if selector selector (op:selector self))) ! (delayval (op:delay self)) ! (md (mode:c-type mode)) ! (name (if ! (eq? (obj:name hw) 'h-memory) ! (string-append md "_memory") ! (gen-c-symbol (obj:name hw)))) ! (getter (op:getter self)) ! (def-val (cond ((obj-has-attr? self 'RAW) ! (send hw 'cxmake-get-raw estate mode index selector)) ! (getter ! (let ((args (car getter)) ! (expr (cadr getter))) ! (rtl-c-expr mode expr ! (if (= (length args) 0) nil ! (list (list (car args) 'UINT index))) ! #:rtl-cover-fns? #t ! #:output-language (estate-output-language estate)))) ! (else ! (send hw 'cxmake-get estate mode index selector))))) ! (logit 4 " cxmake-get self=" (obj:name self) " mode=" (obj:name mode) " index=" (obj:name index) " selector=" selector "\n") ! ! (if delayval ! (cx:make mode (string-append "lookahead (" ! (number->string delayval) ! ", tick, " ! "buf." name "_writes, " ! (cx:c def-val) ! idx-args ")")) ! def-val))) ) *************** *** 1056,1071 **** (send (op:type op) 'gen-set-quiet estate mode index selector newval) ) ! (define (-op-gen-set-quiet-parallel op estate mode index selector newval) ! (string-append ! (if (op-save-index? op) ! (string-append " " -par-operand-macro " (" (-op-index-name op) ")" ! " = " (-gen-hw-index index estate) ";\n") ! "") ! " " ! -par-operand-macro " (" (gen-sym op) ")" ! " = " (cx:c newval) ";\n") ! ) (define (-op-gen-set-trace op estate mode index selector newval) (string-append --- 1028,1036 ---- (send (op:type op) 'gen-set-quiet estate mode index selector newval) ) ! (define (-op-gen-delayed-set-quiet op estate mode index selector newval) ! (-op-gen-delayed-set-maybe-trace op estate mode index selector newval #f)) ! (define (-op-gen-set-trace op estate mode index selector newval) (string-append *************** *** 1129,1143 **** " }\n") ) ! (define (-op-gen-set-trace-parallel op estate mode index selector newval) ! (string-append ! " {\n" ! " " (mode:c-type mode) " opval = " (cx:c newval) ";\n" ! (if (op:cond? op) ! (string-append " written |= (1ULL << " ! (number->string (op:num op)) ! ");\n") ! "") ; TRACE_RESULT_ (cpu, abuf, hwnum, opnum, value); ; For each insn record array of operand numbers [or indices into ; operand instance table]. --- 1094,1134 ---- " }\n") ) ! (define (-op-gen-delayed-set-trace op estate mode index selector newval) ! (-op-gen-delayed-set-maybe-trace op estate mode index selector newval #t)) ! ! (define (-op-gen-delayed-set-maybe-trace op estate mode index selector newval do-trace?) ! (let* ((pad " ") ! (hw (op:type op)) ! (delayval (op:delay op)) ! (md (mode:c-type mode)) ! (name (if ! (eq? (obj:name hw) 'h-memory) ! (string-append md "_memory") ! (gen-c-symbol (obj:name hw)))) ! (val (cx:c newval)) ! (idx (if index (-gen-hw-index index estate) "")) ! (idx-args (if (equal? idx "") "" (string-append ", " idx))) ! ) ! ! (string-append ! " {\n" ! ! (if delayval ! ! ;; delayed write: push it to the appropriate buffer ! (string-append ! pad md " opval = " val ";\n" ! pad "buf." name "_writes [(tick + " (number->string delayval) ! ") % @prefix@::pipe_sz].push (@prefix@::write<" md ">(pc, opval" idx-args "));\n") ! ! ;; else, uh, we should never have been called! ! (error "-op-gen-delayed-set-maybe-trace called on non-delayed operand")) ! ! ! (if do-trace? ! ! (string-append ; TRACE_RESULT_ (cpu, abuf, hwnum, opnum, value); ; For each insn record array of operand numbers [or indices into ; operand instance table]. *************** *** 1169,1183 **** "(USI) " "")) "opval << dec << \" \";\n" - (if (op-save-index? op) - (string-append " " -par-operand-macro " (" (-op-index-name op) ")" - " = " (-gen-hw-index index estate) ";\n") - "") - " " -par-operand-macro " (" (gen-sym op) ")" - " = opval;\n" " }\n") ! ) ! ; Return C code to set the value of an operand. ; NEWVAL is a object of the value to store. --- 1160,1168 ---- "(USI) " "")) "opval << dec << \" \";\n" " }\n") ! ;; else no tracing is emitted ! "")))) ; Return C code to set the value of an operand. ; NEWVAL is a object of the value to store. *************** *** 1196,1203 **** (selector (if selector selector (op:selector self)))) (cond ((obj-has-attr? self 'RAW) (send (op:type self) 'gen-set-quiet-raw estate mode index selector newval)) ! ((with-parallel-write?) ! (-op-gen-set-quiet-parallel self estate mode index selector newval)) (else (-op-gen-set-quiet self estate mode index selector newval))))) ) --- 1181,1188 ---- (selector (if selector selector (op:selector self)))) (cond ((obj-has-attr? self 'RAW) (send (op:type self) 'gen-set-quiet-raw estate mode index selector newval)) ! ((op:delay self) ! (-op-gen-delayed-set-quiet self estate mode index selector newval)) (else (-op-gen-set-quiet self estate mode index selector newval))))) ) *************** *** 1219,1244 **** (selector (if selector selector (op:selector self)))) (cond ((obj-has-attr? self 'RAW) (send (op:type self) 'gen-set-quiet-raw estate mode index selector newval)) ! ((with-parallel-write?) ! (-op-gen-set-trace-parallel self estate mode index selector newval)) (else (-op-gen-set-trace self estate mode index selector newval))))) ) - ; Define and undefine C macros to tuck away details of instruction format used - ; in the parallel execution functions. See gen-define-field-macro for a - ; similar thing done for extraction/semantic functions. - - (define (gen-define-parallel-operand-macro sfmt) - (string-append "#define " -par-operand-macro "(f) " - "par_exec->operands." - (gen-sym sfmt) - ".f\n") - ) - - (define (gen-undef-parallel-operand-macro sfmt) - (string-append "#undef " -par-operand-macro "\n") - ) ; Operand profiling and parallel execution support. --- 1204,1215 ---- (selector (if selector selector (op:selector self)))) (cond ((obj-has-attr? self 'RAW) (send (op:type self) 'gen-set-quiet-raw estate mode index selector newval)) ! ((op:delay self) ! (-op-gen-delayed-set-trace self estate mode index selector newval)) (else (-op-gen-set-trace self estate mode index selector newval))))) ) ; Operand profiling and parallel execution support. Index: cgen/utils.scm =================================================================== RCS file: /cvs/src/src/cgen/utils.scm,v retrieving revision 1.17 diff -c -p -r1.17 utils.scm *** cgen/utils.scm 10 May 2005 10:21:02 -0000 1.17 --- cgen/utils.scm 15 Jun 2005 20:58:26 -0000 *************** *** 563,568 **** --- 563,584 ---- (reverse! (list-drop n (reverse l))) ) + ;; left fold + (define (foldl kons accum lis) + (if (null? lis) accum + (foldl kons (kons accum (car lis)) (cdr lis)))) + + ;; right fold + (define (foldr kons knil lis) + (if (null? lis) knil + (kons (car lis) (foldr kons knil (cdr lis))))) + + ;; filter list on predicate + (define (filter p ls) + (foldr (lambda (x a) (if (p x) (cons x a) a)) + '() ls)) + + ; APL's +\ operation on a vector of numbers. (define (plus-scan l) *************** *** 635,646 **** ; Return intersection of two lists. ! (define (intersection l1 l2) ! (cond ((null? l1) l1) ! ((null? l2) l2) ! ((memq (car l1) l2) (cons (car l1) (intersection (cdr l1) l2))) ! (else (intersection (cdr l1) l2))) ! ) ; Return a count of the number of elements of list L1 that are in list L2. ; Uses memq. --- 651,663 ---- ; Return intersection of two lists. ! (define (intersection a b) ! (foldl (lambda (l e) (if (memq e a) (cons e l) l)) '() b)) ! ! ; Return union of two lists. ! ! (define (union a b) ! (foldl (lambda (l e) (if (memq e l) l (cons e l))) a b)) ; Return a count of the number of elements of list L1 that are in list L2. ; Uses memq. Index: cgen/doc/rtl.texi =================================================================== RCS file: /cvs/src/src/cgen/doc/rtl.texi,v retrieving revision 1.21 diff -c -p -r1.21 rtl.texi *** cgen/doc/rtl.texi 10 Jun 2003 22:09:43 -0000 1.21 --- cgen/doc/rtl.texi 15 Jun 2005 20:58:26 -0000 *************** Indicate that there are @samp{num} delay *** 2271,2276 **** --- 2271,2310 ---- @samp{expr}. When using this rtx in instruction semantics, CGEN will infer that the instruction has the DELAY-SLOT attribute. + @item (delay num expr) + In older "sim" simulators, indicates that there are @samp{num} delay + slots in the processing of @samp{expr}. When using this rtx in instruction + semantics, CGEN will infer that the instruction has the DELAY-SLOT + attribute. + + In newer "sid" simulators, evaluates to the writeback queue for hardware + operand @samp{expr}, at @samp{num} instruction cycles in the + future. @samp{expr} @emph{must} be a hardware operand in this case. + + For example, @code{(set (delay 3 pc) (+ pc 1))} will schedule write to + the @samp{pc} register in the writeback phase of the 3rd instruction + after the current. Alternatively, @code{(set gr1 (delay 3 gr2))} will + immediately update the @samp{gr1} register with the @emph{latest write} + to the @samp{gr2} register scheduled between the present and 3 + instructions in the future. @code{(delay 0 ...)} refers to the + writeback phase of the current instruction. + + This effect is modeled with a circular buffer of "write stacks" for each + hardware element (register banks get a single stack). The size of the + circular buffer is calculated from the uses of @code{(delay ...)} + rtxs. When a delayed write occurs, the simulator pushes the write onto + the appropriate write stack in the "future" of the circular buffer for + the written-to hardware element. At the end of each instruction cycle, + the simulator executes all writes in all write stacks for the time slice + just ending. When a delayed read (essentially a pipeline bypass) occurs, + the simulator looks ahead in the circular buffer for any writes + scheduled in the future write stack. If it doesn't find one, it + progressively backs off towards the "current" instruction cycle's write + stack, and if it still finds no scheduled writes then it returns the + current state of the CPU. Thus while delayed writes are fast, delayed + reads are potentially slower in a simulator with long pipelines and very + large register banks. + @item (annul yes?) @c FIXME: put annul into the glossary. Annul the following instruction if @samp{yes?} is non-zero. This rtx is